diff --git a/src/main/java/org/flag4j/concurrency/Configurations.java b/src/main/java/org/flag4j/concurrency/Configurations.java
index 38a3a3d53..ea14fc6a3 100644
--- a/src/main/java/org/flag4j/concurrency/Configurations.java
+++ b/src/main/java/org/flag4j/concurrency/Configurations.java
@@ -29,7 +29,7 @@
 /**
  * Configurations for standard and concurrent operations.
  */
-public abstract class Configurations {
+public final class Configurations {
     private Configurations() {
         throw new IllegalStateException(ErrorMessages.getUtilityClassErrMsg());
     }
@@ -41,29 +41,24 @@ private Configurations() {
     /**
      * The default block size for blocked algorithms.
      */
-    private static final int DEFAULT_BLOCK_SIZE = 64;
+    public static final int DEFAULT_BLOCK_SIZE = 64;
     /**
      * The default minimum recursive size for recursive algorithms.
      */
-    private static final int DEFAULT_MIN_RECURSIVE_SIZE = 128;
-
+    public static final int DEFAULT_MIN_RECURSIVE_SIZE = 128;
     /**
      * The block size to use in blocked algorithms.
      */
     private static int blockSize = DEFAULT_BLOCK_SIZE;
 
-    /**
-     * The minimum size of tensor/matrix/vector to make recursive calls on in recursive algorithms.
-     */
-    private static int minRecursiveSize = DEFAULT_MIN_RECURSIVE_SIZE;
-
 
     /**
      * Sets the number of threads for use in concurrent operations as the number of processors available to the Java
      * virtual machine. Note that this value may change during runtime. This method will include logical cores so the value
      * returned may be higher than the number of physical cores on the machine if hyper-threading is enabled.
      * <br><br>
-     * This is implemented as: <code>numThreads = {@link Runtime#availableProcessors() Runtime.getRuntime().availableProcessors()};</code>
+     * @implNote This is implemented as:
+     * <code>numThreads = {@link Runtime#availableProcessors() Runtime.getRuntime().availableProcessors()};</code>
      * @return The new value of numThreads, i.e. the number of available processors.
      */
     public static int setNumThreadsAsAvailableProcessors() {
@@ -108,30 +103,11 @@ public static void setBlockSize(int blockSize) {
     }
 
 
-    /**
-     * Gets the minimum size of tensor/matrix/vector to make recursive calls on in recursive algorithms.
-     * @return minimum size of tensor/matrix/vector to make recursive calls on in recursive algorithms.
-     */
-    public static int getMinRecursiveSize() {
-        return minRecursiveSize;
-    }
-
-
-    /**
-     * Sets the minimum size of tensor/matrix/vector to make recursive calls on in recursive algorithms.
-     * @param minRecursiveSize New minimum size.
-     */
-    public static void setMinRecursiveSize(int minRecursiveSize) {
-        Configurations.minRecursiveSize = Math.max(1, minRecursiveSize);
-    }
-
-
     /**
      * Resets all configurations to their default values.
      */
     public static void resetAll() {
         ThreadManager.setParallelismLevel(DEFAULT_NUM_THREADS);
         blockSize = DEFAULT_BLOCK_SIZE;
-        minRecursiveSize = DEFAULT_MIN_RECURSIVE_SIZE;
     }
 }
diff --git a/src/main/java/org/flag4j/concurrency/TensorOperation.java b/src/main/java/org/flag4j/concurrency/TensorOperation.java
new file mode 100644
index 000000000..bed5f6e44
--- /dev/null
+++ b/src/main/java/org/flag4j/concurrency/TensorOperation.java
@@ -0,0 +1,40 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2024. Jacob Watters
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package org.flag4j.concurrency;
+
+
+/**
+ * Functional interface for general tensor operation.
+ */
+@FunctionalInterface
+public interface TensorOperation {
+
+    /**
+     * Applies a tensor operation over the specified index range.
+     * @param startIdx Staring index for operation.
+     * @param endIdx Ending index for operation.
+     */
+    void apply(int startIdx, int endIdx);
+}
diff --git a/src/main/java/org/flag4j/concurrency/ThreadManager.java b/src/main/java/org/flag4j/concurrency/ThreadManager.java
index 5d7a038a6..4012e9731 100644
--- a/src/main/java/org/flag4j/concurrency/ThreadManager.java
+++ b/src/main/java/org/flag4j/concurrency/ThreadManager.java
@@ -26,20 +26,16 @@
 
 import org.flag4j.util.ErrorMessages;
 
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ThreadFactory;
-import java.util.function.IntConsumer;
-import java.util.logging.Level;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.*;
 import java.util.logging.Logger;
-import java.util.stream.IntStream;
 
 /**
  * This class contains the base thread pool for all concurrent operations and several methods for managing the
  * pool.
  */
-public class ThreadManager {
+public final class ThreadManager {
     private ThreadManager() {
         // Hide default constructor for utility class.
         throw new IllegalStateException(ErrorMessages.getUtilityClassErrMsg());
@@ -68,7 +64,7 @@ private ThreadManager() {
     /**
      * Thread pool for managing threads executing concurrent operations.
      */
-    protected static ExecutorService threadPool = Executors.newFixedThreadPool(parallelismLevel, daemonFactory);
+    private static ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(parallelismLevel, daemonFactory);
 
 
     /**
@@ -78,7 +74,7 @@ private ThreadManager() {
      */
     protected static void setParallelismLevel(int parallelismLevel) {
         ThreadManager.parallelismLevel = Math.max(parallelismLevel, 1);
-        threadPool = Executors.newFixedThreadPool(parallelismLevel, daemonFactory);
+        threadPool.setCorePoolSize(parallelismLevel);
     }
 
 
@@ -92,44 +88,110 @@ public static int getParallelismLevel() {
 
 
     /**
-     * Applies a concurrent loop to a function.
-     * @param startIndex Starting index for concurrent loop (inclusive).
-     * @param endIndex Ending index for concurrent loop (exclusive).
-     * @param function Function to apply each iteration. Function may be dependent on iteration index but should
-     *                 individual iterations should be independent of each other.
+     * Computes a specified tensor operation concurrently by evenly dividing work amoung available threads (specified by
+     * {@link Configurations#getNumThreads()}).
+     * @param totalSize Total size of the outer loop for the operation.
+     * @param operation Operation to be computed.
      */
-    public static void concurrentLoop(int startIndex, int endIndex, IntConsumer function) {
-        try {
-            threadPool.submit(() -> IntStream.range(startIndex, endIndex).parallel().forEach(function)).get();
-        } catch (InterruptedException | ExecutionException e) {
-            threadLogger.setLevel(Level.WARNING);
-            threadLogger.warning(e.getMessage());
-            Thread.currentThread().interrupt();
+    public static void concurrentOperation(final int totalSize, final TensorOperation operation) {
+        // Calculate chunk size.
+        int chunkSize = (totalSize + parallelismLevel - 1) / parallelismLevel;
+        List<Future<?>> futures = new ArrayList<>(parallelismLevel);
+
+        for(int threadIndex = 0; threadIndex < parallelismLevel; threadIndex++) {
+            final int startIdx = threadIndex * chunkSize;
+            final int endIdx = Math.min(startIdx + chunkSize, totalSize);
+
+            if(startIdx >= endIdx) break; // No more indices to process.
+
+            futures.add(ThreadManager.threadPool.submit(() -> {
+                operation.apply(startIdx, endIdx);
+            }));
+        }
+
+        // Wait for all tasks to complete.
+        for(Future<?> future : futures) {
+            try {
+                future.get(); // Ensure all tasks are complete.
+            } catch (InterruptedException | ExecutionException e) {
+                // An exception occured.
+                threadLogger.warning(e.getMessage());
+                Thread.currentThread().interrupt();
+            }
         }
     }
 
 
     /**
-     * Applies a concurrent strided-loop to a function.
-     * @param startIndex Starting index for concurrent loop (inclusive).
-     * @param endIndex Ending index for concurrent loop (exclusive).
-     * @param step Step size for the index variable of the loop (i.e. the stride size).
-     * @param function Function to apply each iteration. Function may be dependent on iteration index but should
-     *      individual iterations should be independent of each other.
+     * Computes a specified blocked tensor operation concurrently by evenly dividing work amoung available threads (specified by
+     * {@link Configurations#getNumThreads()}).
+     * @param totalSize Total size of the outer loop for the operation.
+     * @param blockSize Size of the block used in the blocekdOperation.
+     * @param blockedOperation Operation to be computed.
+     */
+    public static void concurrentBlockedOperation(final int totalSize, final int blockSize, final TensorOperation blockedOperation) {
+        // Calculate chunk size for blocks.
+        int numBlocks = (totalSize + blockSize - 1) / blockSize;
+        List<Future<?>> futures = new ArrayList<>(parallelismLevel);
+
+        for(int blockIndex = 0; blockIndex < numBlocks; blockIndex++) {
+            final int startBlock = blockIndex * blockSize;
+            final int endBlock = Math.min(startBlock + blockSize, totalSize);
+
+            futures.add(threadPool.submit(() -> {
+                blockedOperation.apply(startBlock, endBlock);
+            }));
+        }
+
+        // Wait for all tasks to complete.
+        for(Future<?> future : futures) {
+            try {
+                future.get(); // Ensure all tasks are complete.
+            } catch (InterruptedException | ExecutionException e) {
+                // An exception occured.
+                threadLogger.warning(e.getMessage());
+                Thread.currentThread().interrupt();
+            }
+        }
+    }
+
+    // TODO: TEMP FOR TESTING.
+    /**
+     * Executes a concurrent operation on a given range of indices.
+     * The operation is split across multiple threads, each handling a subset of the range.
+     *
+     * @param totalTasks The total number of tasks (e.g., rows in a matrix) to be processed.
+     * @param task A lambda expression or function that takes three arguments: start index, end index, and thread ID.
+     *             This function represents the work to be done by each thread for its assigned range.
      */
-    public static void concurrentLoop(int startIndex, int endIndex, int step, IntConsumer function) {
-        if(step <= 0)
-            throw new IllegalArgumentException(ErrorMessages.getNegValueErr(startIndex));
+    public static void concurrentOperation(int totalTasks, TriConsumer<Integer, Integer, Integer> task) {
+        int numThreads = Runtime.getRuntime().availableProcessors();
+        ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+
+        int tasksPerThread = (totalTasks + numThreads - 1) / numThreads;  // Ceiling division
+
+        for (int threadId = 0; threadId < numThreads; threadId++) {
+            int startIdx = threadId * tasksPerThread;
+            int endIdx = Math.min(startIdx + tasksPerThread, totalTasks);
+
+            if (startIdx < endIdx) {
+                final int finalThreadId = threadId;
+                executor.submit(() -> task.accept(startIdx, endIdx, finalThreadId));
+            }
+        }
+
+        executor.shutdown();
+
         try {
-            int range = endIndex - startIndex;
-            int iterations = range/step + ((range%step == 0) ? 0 : 1);
-            threadPool.submit(() -> IntStream.range(0, iterations).parallel().forEach(
-                    i -> function.accept(startIndex + i*step))
-            ).get();
-        } catch (InterruptedException | ExecutionException e) {
-            threadLogger.setLevel(Level.WARNING);
-            threadLogger.warning(e.getMessage());
+            executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
+        } catch (InterruptedException e) {
             Thread.currentThread().interrupt();
+            throw new RuntimeException("Thread execution interrupted", e);
         }
     }
+
+    @FunctionalInterface
+    public interface TriConsumer<T, U, V> {
+        void accept(T t, U u, V v);
+    }
 }
diff --git a/src/main/java/org/flag4j/core/Shape.java b/src/main/java/org/flag4j/core/Shape.java
index 5757e730d..ceb3d3d21 100644
--- a/src/main/java/org/flag4j/core/Shape.java
+++ b/src/main/java/org/flag4j/core/Shape.java
@@ -24,7 +24,6 @@
 
 package org.flag4j.core;
 
-import org.flag4j.arrays.dense.Tensor;
 import org.flag4j.util.ArrayUtils;
 import org.flag4j.util.ParameterChecks;
 
@@ -311,12 +310,4 @@ public String toString() {
 
         return joiner.toString();
     }
-
-
-    public static void main(String[] args) {
-        Shape s = new Shape();
-        Tensor t = new Tensor(s);
-
-        System.out.println(t.entries.length);
-    }
 }
diff --git a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemDiv.java b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemDiv.java
index 96beef75d..bffcc0037 100644
--- a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemDiv.java
+++ b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemDiv.java
@@ -81,8 +81,11 @@ public static CNumber[] elemDivConcurrent(CNumber[] src1, Shape shape1, CNumber[
         ParameterChecks.assertEqualShape(shape1, shape2);
         CNumber[] product = new CNumber[src1.length];
 
-        ThreadManager.concurrentLoop(0, product.length,
-                (i)->product[i] = src1[i].div(src2[i]));
+        ThreadManager.concurrentOperation(product.length, (start, end)->{
+            for(int i=start; i<end; i++) {
+                product[i] = src1[i].div(src2[i]);
+            }
+        });
 
         return product;
     }
diff --git a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemMult.java b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemMult.java
index 563046e7d..37b525c02 100644
--- a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemMult.java
+++ b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseElemMult.java
@@ -38,7 +38,7 @@ public class ComplexDenseElemMult {
     /**
      * Minimum number of entries in each tensor to apply concurrent algorithm.
      */
-    private static final int CONCURRENT_THRESHOLD = 50625;
+    private static final int CONCURRENT_THRESHOLD = 50_000;
 
 
     private ComplexDenseElemMult() {
@@ -81,9 +81,11 @@ public static CNumber[] elemMultConcurrent(CNumber[] src1, Shape shape1, CNumber
         ParameterChecks.assertEqualShape(shape1, shape2);
         CNumber[] product = new CNumber[src1.length];
 
-        ThreadManager.concurrentLoop(0, product.length,
-                (i)->product[i] = src1[i].mult(src2[i])
-        );
+        ThreadManager.concurrentOperation(product.length, ((startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                product[i] = src1[i].mult(src2[i]);
+            }
+        }));
 
         return product;
     }
diff --git a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultTranspose.java b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultTranspose.java
index 2ef8caf4c..40928029d 100644
--- a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultTranspose.java
+++ b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultTranspose.java
@@ -75,10 +75,13 @@ public static CNumber[] multTranspose(CNumber[] src1, Shape shape1, CNumber[] sr
                 src1Index = src1IndexStart;
                 src2Index = j*cols2;
                 destIndex = destIndexStart + j;
+                CNumber sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                    sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -127,10 +130,13 @@ public static CNumber[] multTransposeBlocked(CNumber[] src1, Shape shape1, CNumb
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = j*cols2 + kk;
+                            CNumber sum = dest[destIndex];
 
                             while(src1Index<end) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                                sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -159,18 +165,23 @@ public static CNumber[] multTransposeConcurrent(CNumber[] src1, Shape shape1, CN
         CNumber[] dest = new CNumber[rows1*rows2]; // Since second matrix is transposed, its columns will become rows.
         Arrays.fill(dest, CNumber.ZERO);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols2;
-            int destIndexStart = i*rows2;
-            int end = src1IndexStart + cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols2;
+                int destIndexStart = i*rows2;
+                int end = src1IndexStart + cols2;
 
-            for(int j=0; j<rows2; j++) {
-                int src1Index = src1IndexStart;
-                int src2Index = j*cols2;
-                int destIndex = destIndexStart + j;
+                for(int j=0; j<rows2; j++) {
+                    int src1Index = src1IndexStart;
+                    int src2Index = j*cols2;
+                    int destIndex = destIndexStart + j;
+                    CNumber sum = dest[destIndex];
 
-                while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                    while(src1Index<end) {
+                        sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                    }
+
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -198,28 +209,33 @@ public static CNumber[] multTransposeBlockedConcurrent(CNumber[] src1, Shape sha
 
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii)->{
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=0; ii<rows2; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int jj = 0; jj<rows2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, rows2);
+                for(int jj = 0; jj<rows2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, rows2);
 
-                for(int kk = 0; kk<cols2; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols2);
+                    for(int kk = 0; kk<cols2; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols2);
 
-                    // Multiply the blocks
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*rows2;
-                        int src1Start = i*cols2 + kk;
-                        int end = src1Start + kBound - kk;
+                        // Multiply the blocks
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*rows2;
+                            int src1Start = i*cols2 + kk;
+                            int end = src1Start + kBound - kk;
 
-                        for(int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = j*cols2 + kk;
+                            for(int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = j*cols2 + kk;
+                                CNumber sum = dest[destIndex];
 
-                            while(src1Index<end) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                                while(src1Index<end) {
+                                    sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                                }
+
+                                dest[destIndex] = sum;
                             }
                         }
                     }
diff --git a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultiplication.java
index 30b7f3238..a86e9b7a7 100644
--- a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseMatrixMultiplication.java
@@ -33,9 +33,10 @@
 
 
 /**
- * This class contains several low level methods for computing complex matrix-matrix multiplications. This includes transpose
- * multiplications. <br>
- * <b>WARNING:</b> These methods do not perform any sanity checks.
+ * <p>This class contains several low level methods for computing complex matrix-matrix multiplications. This includes transpose
+ * multiplications.</p>
+ *
+ * <p><b>WARNING:</b> These methods do not perform any sanity checks.</p>
  */
 public final class ComplexDenseMatrixMultiplication {
 
@@ -72,11 +73,14 @@ public static CNumber[] standard(CNumber[] src1, Shape shape1, CNumber[] src2, S
                 src1Index = src1IndexStart;
                 destIndex = destIndexStart + j;
                 end = src1Index + rows2;
+                CNumber sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
+                    sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
                     src2Index += cols2;
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -163,11 +167,14 @@ public static CNumber[] blocked(CNumber[] src1, Shape shape1, CNumber[] src2, Sh
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = kk*cols2 + j;
+                            CNumber sum = dest[destIndex];
 
                             while(src1Index < stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
+                                sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
                                 src2Index+=cols2;
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -252,18 +259,23 @@ public static CNumber[] concurrentStandard(CNumber[] src1, Shape shape1, CNumber
         CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols1;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols1;
+                int destIndexStart = i*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                int src2Index = j;
-                int src1Index = src1IndexStart;
-                int destIndex = destIndexStart + j;
+                for(int j=0; j<cols2; j++) {
+                    int src2Index = j;
+                    int src1Index = src1IndexStart;
+                    int destIndex = destIndexStart + j;
+                    CNumber sum = dest[destIndex];
 
-                for(int k=0; k<cols1; k++) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
-                    src2Index += cols2;
+                    for(int k=0; k<cols1; k++) {
+                        sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
+                        src2Index += cols2;
+                    }
+
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -289,18 +301,20 @@ public static CNumber[] concurrentReordered(CNumber[] src1, Shape shape1, CNumbe
         CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*rows2;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*rows2;
+                int destIndexStart = i*cols2;
 
-            for(int k=0; k<rows2; k++) {
-                int src2Index = k*cols2;
-                int destIndex = destIndexStart;
-                int end = src2Index + cols2;
+                for(int k=0; k<rows2; k++) {
+                    int src2Index = k*cols2;
+                    int destIndex = destIndexStart;
+                    int end = src2Index + cols2;
 
-                while(src2Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1IndexStart + k].mult(src2[src2Index++]));
-                    destIndex++;
+                    while(src2Index<end) {
+                        dest[destIndex] = dest[destIndex].add(src1[src1IndexStart + k].mult(src2[src2Index++]));
+                        destIndex++;
+                    }
                 }
             }
         });
@@ -327,29 +341,34 @@ public static CNumber[] concurrentBlocked(CNumber[] src1, Shape shape1, CNumber[
         ArrayUtils.fill(dest, 0);
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int jj = 0; jj<cols2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, cols2);
+                for(int jj = 0; jj<cols2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, cols2);
 
-                for(int kk = 0; kk<cols1; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols1);
+                    for(int kk = 0; kk<cols1; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols1);
 
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int src1Start = i*cols1 + kk;
-                        int stopIndex = src1Start+(kBound-kk);
-                        int destStart = i*cols2;
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int src1Start = i*cols1 + kk;
+                            int stopIndex = src1Start+(kBound-kk);
+                            int destStart = i*cols2;
 
-                        for (int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = kk*cols2 + j;
+                            for (int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = kk*cols2 + j;
+                                CNumber sum = dest[destIndex];
 
-                            while(src1Index < stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
-                                src2Index+=cols2;
+                                while(src1Index < stopIndex) {
+                                    sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
+                                    src2Index+=cols2;
+                                }
+
+                                dest[destIndex] = sum;
                             }
                         }
                     }
@@ -379,30 +398,32 @@ public static CNumber[] concurrentBlockedReordered(CNumber[] src1, Shape shape1,
         ArrayUtils.fill(dest, 0);
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
-
-            for(int kk = 0; kk<cols1; kk+=blockSize) {
-                int kBound = Math.min(kk + blockSize, cols1);
-
-                for(int jj = 0; jj<cols2; jj+=blockSize) {
-                    int jBound = Math.min(jj + blockSize, cols2);
-
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*cols2;
-                        int src1Start = i*cols1;
-                        int stopIndex = destStart+jBound;
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-                        for (int k=kk; k<kBound; k++) {
-                            int destIndex = destStart + jj;
-                            int src1Index = src1Start + k;
-                            int src2Index = k*cols2 + jj;
+                for(int kk = 0; kk<cols1; kk+=blockSize) {
+                    int kBound = Math.min(kk + blockSize, cols1);
 
-                            while(destIndex<stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index].mult(src2[src2Index]));
-                                destIndex++;
-                                src2Index++;
+                    for(int jj = 0; jj<cols2; jj+=blockSize) {
+                        int jBound = Math.min(jj + blockSize, cols2);
+
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*cols2;
+                            int src1Start = i*cols1;
+                            int stopIndex = destStart+jBound;
+
+                            for (int k=kk; k<kBound; k++) {
+                                int destIndex = destStart + jj;
+                                int src1Index = src1Start + k;
+                                int src2Index = k*cols2 + jj;
+
+                                while(destIndex<stopIndex) {
+                                    dest[destIndex] = dest[destIndex].add(src1[src1Index].mult(src2[src2Index]));
+                                    destIndex++;
+                                    src2Index++;
+                                }
                             }
                         }
                     }
@@ -434,10 +455,13 @@ public static CNumber[] standardVector(CNumber[] src1, Shape shape1, CNumber[] s
         for(int i=0; i<rows1; i++) {
             src1Index = i*cols1;
             src2Index = 0;
+            CNumber sum = dest[i];
 
             while(src2Index<rows2) {
-                dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
             }
+
+            dest[i] = sum;
         }
 
         return dest;
@@ -474,10 +498,13 @@ public static CNumber[] blockedVector(CNumber[] src1, Shape shape1, CNumber[] sr
                 for(int i=ii; i<iBound; i++) {
                     src1Index = i*cols1 + kk;
                     src2Index = kk;
+                    CNumber sum = dest[i];
 
                     while(src2Index<kBound) {
-                        dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                        sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
                     }
+
+                    dest[i] = sum;
                 }
             }
         }
@@ -503,12 +530,17 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, Shape shape1, C
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1Index = i*cols1;
-            int src2Index = 0;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1Index = i*cols1;
+                int src2Index = 0;
+                CNumber sum = dest[i];
 
-            while(src2Index<rows2) {
-                dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                while(src2Index<rows2) {
+                    sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                }
+
+                dest[i] = sum;
             }
         });
 
@@ -534,19 +566,24 @@ public static CNumber[] concurrentBlockedVector(CNumber[] src1, Shape shape1, CN
         ArrayUtils.fill(dest, 0);
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii+blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii+blockSize, rows1);
 
-            for(int kk=0; kk<rows2; kk+=blockSize) {
-                int kBound = Math.min(kk+blockSize, rows2);
+                for(int kk=0; kk<rows2; kk+=blockSize) {
+                    int kBound = Math.min(kk+blockSize, rows2);
 
-                // Multiply the current blocks
-                for(int i=ii; i<iBound; i++) {
-                    int src1Index = i*cols1 + kk;
-                    int src2Index = kk;
+                    // Multiply the current blocks
+                    for(int i=ii; i<iBound; i++) {
+                        int src1Index = i*cols1 + kk;
+                        int src2Index = kk;
+                        CNumber sum = dest[i];
 
-                    while(src2Index<kBound) {
-                        dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                        while(src2Index<kBound) {
+                            sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                        }
+
+                        dest[i] = sum;
                     }
                 }
             }
diff --git a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseTranspose.java b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseTranspose.java
index 96e0c444b..987a9c855 100644
--- a/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseTranspose.java
+++ b/src/main/java/org/flag4j/operations/dense/complex/ComplexDenseTranspose.java
@@ -83,18 +83,20 @@ public static CNumber[] standard(final CNumber[] src, final Shape shape, final i
      */
     public static CNumber[] standardConcurrent(final CNumber[] src, final Shape shape, final int axis1, final int axis2) {
         if(shape.getRank() < 2) { // Can't transpose tensor with less than 2 axes.
-            throw new IllegalArgumentException("Tensor transpose not defined for rank " + shape.getRank() +
-                    " tensor.");
+            throw new IllegalArgumentException("Tensor transpose not defined for rank "
+                    + shape.getRank() + " tensor.");
         }
 
         CNumber[] dest = new CNumber[shape.totalEntries().intValue()];
         Shape destShape = shape.swapAxes(axis1, axis2);
 
         // Compute transpose concurrently
-        ThreadManager.concurrentLoop(0, src.length, (i) -> {
-            int[] destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
-            dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+        ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int[] destIndices = shape.getIndices(i);
+                ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
+                dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+            }
         });
 
         return dest;
@@ -124,7 +126,7 @@ public static CNumber[] standard(final CNumber[] src, final Shape shape, final i
 
         for(int i=0; i<src.length; i++) {
             destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axes); // Compute destination indices.
+            ArrayUtils.swapUnsafe(destIndices, axes); // Compute destination indices.
             dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
         }
 
@@ -152,10 +154,13 @@ public static CNumber[] standardConcurrent(final CNumber[] src, final Shape shap
         CNumber[] dest = new CNumber[shape.totalEntries().intValue()];
         Shape destShape = shape.swapAxes(axes);
 
-        ThreadManager.concurrentLoop(0, src.length, (i) -> {
-            int[] destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axes); // Compute destination indices.
-            dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+        // Compute transpose concurrently.
+        ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int[] destIndices = shape.getIndices(i);
+                ArrayUtils.swapUnsafe(destIndices, axes); // Compute destination indices.
+                dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+            }
         });
 
         return dest;
@@ -239,14 +244,16 @@ public static CNumber[] standardMatrixConcurrent(final CNumber[] src, final int
         CNumber[] dest = new CNumber[src.length];
 
         // Compute transpose concurrently.
-        ThreadManager.concurrentLoop(0, numCols, (i) -> {
-            int srcIndex = i;
-            int destIndex = i*numRows;
-            int end = destIndex + numRows;
-
-            while (destIndex < end) {
-                dest[destIndex++] = src[srcIndex];
-                srcIndex += numCols;
+        ThreadManager.concurrentOperation(numCols, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int srcIndex = i;
+                int destIndex = i*numRows;
+                int end = destIndex + numRows;
+
+                while(destIndex < end) {
+                    dest[destIndex++] = src[srcIndex];
+                    srcIndex += numCols;
+                }
             }
         });
 
@@ -266,20 +273,22 @@ public static CNumber[] blockedMatrixConcurrent(final CNumber[] src, final int n
         final int blockSize = Configurations.getBlockSize();
 
         // Compute transpose concurrently.
-        ThreadManager.concurrentLoop(0, numCols, blockSize, (i) -> {
-            for(int j=0; j<numRows; j+=blockSize) {
-                int blockRowEnd = Math.min(j+blockSize, numRows);
-                int blockColEnd = Math.min(i+blockSize, numCols);
-
-                // Transpose the block beginning at (i, j)
-                for(int blockI=i; blockI<blockColEnd; blockI++) {
-                    int srcIndex = blockI;
-                    int destIndex = blockI*numRows;
-                    int end = destIndex + blockRowEnd;
-
-                    while (destIndex < end) {
-                        dest[destIndex++] = src[srcIndex];
-                        srcIndex += numCols;
+        ThreadManager.concurrentBlockedOperation(numCols, blockSize, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                for(int j=0; j<numRows; j+=blockSize) {
+                    int blockRowEnd = Math.min(j+blockSize, numRows);
+                    int blockColEnd = Math.min(i+blockSize, numCols);
+
+                    // Transpose the block beginning at (i, j)
+                    for(int blockI=i; blockI<blockColEnd; blockI++) {
+                        int srcIndex = blockI;
+                        int destIndex = blockI*numRows;
+                        int end = destIndex + blockRowEnd;
+
+                        while(destIndex < end) {
+                            dest[destIndex++] = src[srcIndex];
+                            srcIndex += numCols;
+                        }
                     }
                 }
             }
@@ -338,10 +347,12 @@ public static CNumber[] standardConcurrentHerm(final CNumber[] src, final Shape
         Shape destShape = shape.swapAxes(axis1, axis2);
 
         // Compute transpose concurrently
-        ThreadManager.concurrentLoop(0, src.length, (i) -> {
-            int[] destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
-            dest[destShape.entriesIndex(destIndices)] = src[i].conj(); // Apply transpose for the element
+        ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int[] destIndices = shape.getIndices(i);
+                ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
+                dest[destShape.entriesIndex(destIndices)] = src[i].conj(); // Apply transpose for the element
+            }
         });
 
         return dest;
@@ -368,10 +379,12 @@ public static CNumber[] standardConcurrentHerm(final CNumber[] src, final Shape
         CNumber[] dest = new CNumber[shape.totalEntries().intValue()];
         Shape destShape = shape.swapAxes(axes);
 
-        ThreadManager.concurrentLoop(0, src.length, (i) -> {
-            int[] destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axes); // Compute destination indices.
-            dest[destShape.entriesIndex(destIndices)] = src[i].conj(); // Apply conjugate transpose for the element
+        ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int[] destIndices = shape.getIndices(i);
+                ArrayUtils.swapUnsafe(destIndices, axes); // Compute destination indices.
+                dest[destShape.entriesIndex(destIndices)] = src[i].conj(); // Apply conjugate transpose for the element
+            }
         });
 
         return dest;
@@ -396,7 +409,7 @@ public static CNumber[] standardMatrixHerm(final CNumber[] src, final int numRow
             destIndex = i*numRows;
             end = destIndex + numRows;
 
-            while (destIndex < end) {
+            while(destIndex < end) {
                 dest[destIndex++] = src[srcIndex].conj();
                 srcIndex += numCols;
             }
@@ -455,14 +468,16 @@ public static CNumber[] standardMatrixConcurrentHerm(final CNumber[] src, final
         CNumber[] dest = new CNumber[src.length];
 
         // Compute transpose concurrently.
-        ThreadManager.concurrentLoop(0, numCols, (i) -> {
-            int srcIndex = i;
-            int destIndex = i*numRows;
-            int end = destIndex + numRows;
-
-            while (destIndex < end) {
-                dest[destIndex++] = src[srcIndex].conj();
-                srcIndex += numCols;
+        ThreadManager.concurrentOperation(numCols, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int srcIndex = i;
+                int destIndex = i*numRows;
+                int end = destIndex + numRows;
+
+                while (destIndex < end) {
+                    dest[destIndex++] = src[srcIndex].conj();
+                    srcIndex += numCols;
+                }
             }
         });
 
@@ -482,20 +497,22 @@ public static CNumber[] blockedMatrixConcurrentHerm(final CNumber[] src, final i
         final int blockSize = Configurations.getBlockSize();
 
         // Compute transpose concurrently.
-        ThreadManager.concurrentLoop(0, numCols, blockSize, (i) -> {
-            for(int j=0; j<numRows; j+=blockSize) {
-                int blockRowEnd = Math.min(j+blockSize, numRows);
-                int blockColEnd = Math.min(i+blockSize, numCols);
-
-                // Transpose the block beginning at (i, j)
-                for(int blockI=i; blockI<blockColEnd; blockI++) {
-                    int srcIndex = blockI;
-                    int destIndex = blockI*numRows;
-                    int end = destIndex + blockRowEnd;
-
-                    while (destIndex < end) {
-                        dest[destIndex++] = src[srcIndex].conj();
-                        srcIndex += numCols;
+        ThreadManager.concurrentBlockedOperation(numCols, blockSize, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                for(int j=0; j<numRows; j+=blockSize) {
+                    int blockRowEnd = Math.min(j+blockSize, numRows);
+                    int blockColEnd = Math.min(i+blockSize, numCols);
+
+                    // Transpose the block beginning at (i, j)
+                    for(int blockI=i; blockI<blockColEnd; blockI++) {
+                        int srcIndex = blockI;
+                        int destIndex = blockI*numRows;
+                        int end = destIndex + blockRowEnd;
+
+                        while (destIndex < end) {
+                            dest[destIndex++] = src[srcIndex].conj();
+                            srcIndex += numCols;
+                        }
                     }
                 }
             }
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseElemDiv.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseElemDiv.java
index 65c05b37b..de5170bdc 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseElemDiv.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseElemDiv.java
@@ -79,9 +79,11 @@ public static double[] elemDivConcurrent(double[] src1, Shape shape1, double[] s
         ParameterChecks.assertEqualShape(shape1, shape2);
         double[] product = new double[src1.length];
 
-        ThreadManager.concurrentLoop(0, product.length,
-                (i) -> product[i] = src1[i]/src2[i]
-        );
+        ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                product[i] = src1[i]/src2[i];
+            }
+        });
 
         return product;
     }
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseElemMult.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseElemMult.java
index efd82f8fc..612ea3826 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseElemMult.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseElemMult.java
@@ -80,9 +80,11 @@ public static double[] elemMultConcurrent(double[] src1, Shape shape1, double[]
         ParameterChecks.assertEqualShape(shape1, shape2);
         double[] product = new double[src1.length];
 
-        ThreadManager.concurrentLoop(0, product.length,
-                (i) -> product[i] = src1[i]*src2[i]
-        );
+        ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                product[i] = src1[i]*src2[i];
+            }
+        });
 
         return product;
     }
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultTranspose.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultTranspose.java
index 1c8d111e9..85bd042a6 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultTranspose.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultTranspose.java
@@ -68,10 +68,13 @@ public static double[] multTranspose(double[] src1, Shape shape1, double[] src2,
                 src1Index = src1IndexStart;
                 src2Index = j*cols2;
                 destIndex = destIndexStart + j;
+                double sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] += src1[src1Index++]*src2[src2Index++];
+                    sum += src1[src1Index++]*src2[src2Index++];
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -118,10 +121,13 @@ public static double[] multTransposeBlocked(double[] src1, Shape shape1, double[
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = j * cols2 + kk;
+                            double sum = dest[destIndex];
 
                             while (src1Index < end) {
-                                dest[destIndex] += src1[src1Index++] * src2[src2Index++];
+                                sum += src1[src1Index++] * src2[src2Index++];
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -150,18 +156,23 @@ public static double[] multTransposeConcurrent(double[] src1, Shape shape1, doub
 
         double[] dest = new double[rows1*rows2]; // Since second matrix is transposed, its columns will become rows.
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols2;
-            int destIndexStart = i*rows2;
-            int end = src1IndexStart + cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols2;
+                int destIndexStart = i*rows2;
+                int end = src1IndexStart + cols2;
 
-            for(int j=0; j<rows2; j++) {
-                int src1Index = src1IndexStart;
-                int src2Index = j*cols2;
-                int destIndex = destIndexStart + j;
+                for(int j=0; j<rows2; j++) {
+                    int src1Index = src1IndexStart;
+                    int src2Index = j*cols2;
+                    int destIndex = destIndexStart + j;
+                    double sum = dest[destIndex];
 
-                while(src1Index<end) {
-                    dest[destIndex] += src1[src1Index++]*src2[src2Index++];
+                    while(src1Index<end) {
+                        sum += src1[src1Index++]*src2[src2Index++];
+                    }
+
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -187,28 +198,33 @@ public static double[] multTransposeBlockedConcurrent(double[] src1, Shape shape
         double[] dest = new double[rows1*rows2];
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii)->{
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int jj = 0; jj<rows2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, rows2);
+                for(int jj = 0; jj<rows2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, rows2);
 
-                for(int kk = 0; kk<cols2; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols2);
+                    for(int kk = 0; kk<cols2; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols2);
 
-                    // Multiply the blocks
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*rows2;
-                        int src1Start = i*cols2 + kk;
-                        int end = src1Start + kBound - kk;
-
-                        for(int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = j*cols2 + kk;
-
-                            while(src1Index<end) {
-                                dest[destIndex] += src1[src1Index++]*src2[src2Index++];
+                        // Multiply the blocks
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*rows2;
+                            int src1Start = i*cols2 + kk;
+                            int end = src1Start + kBound - kk;
+
+                            for(int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = j*cols2 + kk;
+                                double sum = dest[destIndex];
+
+                                while(src1Index<end) {
+                                    sum += src1[src1Index++]*src2[src2Index++];
+                                }
+
+                                dest[destIndex] = sum;
                             }
                         }
                     }
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultiplication.java
index 4c96a7f6e..ce5695e01 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseMatrixMultiplication.java
@@ -24,9 +24,11 @@
 
 package org.flag4j.operations.dense.real;
 
+import org.flag4j.arrays.dense.Matrix;
 import org.flag4j.concurrency.Configurations;
 import org.flag4j.concurrency.ThreadManager;
 import org.flag4j.core.Shape;
+import org.flag4j.rng.RandomTensor;
 import org.flag4j.util.ErrorMessages;
 
 /**
@@ -250,18 +252,23 @@ public static double[] concurrentStandard(double[] src1, Shape shape1, double[]
 
         double[] dest = new double[rows1*cols2];
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols1;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startRow, endRow) -> {
+            for (int i = startRow; i < endRow; i++) {
+                int src1IndexStart = i * cols1;
+                int destIndexStart = i * cols2;
 
-            for(int j=0; j<cols2; j++) {
-                int src2Index = j;
-                int src1Index = src1IndexStart;
-                int destIndex = destIndexStart + j;
+                for (int j = 0; j < cols2; j++) {
+                    int src2Index = j;
+                    int src1Index = src1IndexStart;
+                    int destIndex = destIndexStart + j;
+                    double sum = 0;
 
-                for(int k=0; k<cols1; k++) {
-                    dest[destIndex] += src1[src1Index++]*src2[src2Index];
-                    src2Index += cols2;
+                    for(int k = 0; k < cols1; k++) {
+                        sum += src1[src1Index++] * src2[src2Index];
+                        src2Index += cols2;
+                    }
+
+                    dest[destIndex] += sum;
                 }
             }
         });
@@ -270,7 +277,6 @@ public static double[] concurrentStandard(double[] src1, Shape shape1, double[]
     }
 
 
-
     /**
      * Computes the matrix multiplication of two real dense matrices using a concurrent implementation of the standard
      * matrix multiplication algorithm with j-k loops swapped.
@@ -287,18 +293,20 @@ public static double[] concurrentReordered(double[] src1, Shape shape1, double[]
 
         double[] dest = new double[rows1*cols2];
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*rows2;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startRow, endRow) -> {
+            for(int i = startRow; i<endRow; i++) {
+                int src1IndexStart = i*rows2;
+                int destIndexStart = i*cols2;
 
-            for(int k=0; k<rows2; k++) {
-                int src2Index = k*cols2;
-                int destIndex = destIndexStart;
-                int end = src2Index + cols2;
-                double src1Value = src1[src1IndexStart++];
+                for(int k=0; k<rows2; k++) {
+                    int src2Index = k*cols2;
+                    int destIndex = destIndexStart;
+                    int end = src2Index + cols2;
+                    double src1Value = src1[src1IndexStart++];
 
-                while(src2Index<end) {
-                    dest[destIndex++] += src1Value*src2[src2Index++];
+                    while(src2Index<end) {
+                        dest[destIndex++] += src1Value*src2[src2Index++];
+                    }
                 }
             }
         });
@@ -324,30 +332,31 @@ public static double[] concurrentBlocked(double[] src1, Shape shape1, double[] s
         double[] dest = new double[rows1*cols2];
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
-
-            for(int jj = 0; jj<cols2; jj+=blockSize) {
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (ii, iBound) -> {
+            for (int jj = 0; jj < cols2; jj += blockSize) {
                 int jBound = Math.min(jj + blockSize, cols2);
 
-                for(int kk = 0; kk<cols1; kk+=blockSize) {
+                for (int kk = 0; kk < cols1; kk += blockSize) {
                     int kBound = Math.min(kk + blockSize, cols1);
 
                     // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int src1Start = i*cols1 + kk;
-                        int stopIndex = src1Start+(kBound-kk);
-                        int destStart = i*cols2;
+                    for (int i = ii; i < iBound; i++) {
+                        int src1Start = i * cols1 + kk;
+                        int stopIndex = src1Start + (kBound - kk);
+                        int destStart = i * cols2;
 
-                        for (int j=jj; j<jBound; j++) {
+                        for (int j = jj; j < jBound; j++) {
                             int destIndex = destStart + j;
                             int src1Index = src1Start;
-                            int src2Index = kk*cols2 + j;
+                            int src2Index = kk * cols2 + j;
+                            double sum = 0;
 
-                            while(src1Index < stopIndex) {
-                                dest[destIndex] += src1[src1Index++] * src2[src2Index];
-                                src2Index+=cols2;
+                            while (src1Index < stopIndex) {
+                                sum += src1[src1Index++] * src2[src2Index];
+                                src2Index += cols2;
                             }
+
+                            dest[destIndex] += sum;
                         }
                     }
                 }
@@ -375,29 +384,31 @@ public static double[] concurrentBlockedReordered(double[] src1, Shape shape1, d
         double[] dest = new double[rows1*cols2];
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (blockStart, blockEnd) -> {
+            for(int ii=blockStart; ii<blockEnd; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int kk = 0; kk<cols1; kk+=blockSize) {
-                int kBound = Math.min(kk + blockSize, cols1);
+                for(int kk = 0; kk<cols1; kk+=blockSize) {
+                    int kBound = Math.min(kk + blockSize, cols1);
 
-                for(int jj = 0; jj<cols2; jj+=blockSize) {
-                    int jBound = Math.min(jj + blockSize, cols2);
+                    for(int jj = 0; jj<cols2; jj+=blockSize) {
+                        int jBound = Math.min(jj + blockSize, cols2);
 
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*cols2;
-                        int src1Start = i*cols1;
-                        int stopIndex = destStart+jBound;
-                        destStart += jj;
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*cols2;
+                            int src1Start = i*cols1;
+                            int stopIndex = destStart+jBound;
+                            destStart += jj;
 
-                        for (int k=kk; k<kBound; k++) {
-                            int destIndex = destStart;
-                            double src1Value = src1[src1Start + k];
-                            int src2Index = k*cols2 + jj;
+                            for (int k=kk; k<kBound; k++) {
+                                int destIndex = destStart;
+                                double src1Value = src1[src1Start + k];
+                                int src2Index = k*cols2 + jj;
 
-                            while(destIndex<stopIndex) {
-                                dest[destIndex++] += src1Value*src2[src2Index++];
+                                while(destIndex<stopIndex) {
+                                    dest[destIndex++] += src1Value*src2[src2Index++];
+                                }
                             }
                         }
                     }
@@ -428,10 +439,13 @@ public static double[] standardVector(double[] src1, Shape shape1, double[] src2
         for(int i=0; i<rows1; i++) {
             src1Index = i*cols1;
             src2Index = 0;
+            double sum = dest[i];
 
             while(src2Index<rows2) {
-                dest[i] += src1[src1Index++]*src2[src2Index++];
+                sum += src1[src1Index++]*src2[src2Index++];
             }
+
+            dest[i] += sum;
         }
 
         return dest;
@@ -467,10 +481,13 @@ public static double[] blockedVector(double[] src1, Shape shape1, double[] src2,
                 for(int i=ii; i<iBound; i++) {
                     src1Index = i*cols1 + kk;
                     src2Index = kk;
+                    double sum = 0;
 
                     while(src2Index<kBound) {
-                        dest[i] += src1[src1Index++]*src2[src2Index++];
+                        sum += src1[src1Index++]*src2[src2Index++];
                     }
+
+                    dest[i] += sum;
                 }
             }
         }
@@ -495,12 +512,17 @@ public static double[] concurrentStandardVector(double[] src1, Shape shape1, dou
 
         double[] dest = new double[rows1];
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1Index = i*cols1;
-            int src2Index = 0;
+        ThreadManager.concurrentOperation(rows1, (rowStart, rowEnd) -> {
+            for(int i=rowStart; i<rowEnd; i++) {
+                int src1Index = i*cols1;
+                int src2Index = 0;
+                double sum = 0;
+
+                while(src2Index<rows2) {
+                    sum += src1[src1Index++]*src2[src2Index++];
+                }
 
-            while(src2Index<rows2) {
-                dest[i] += src1[src1Index++]*src2[src2Index++];
+                dest[i] += sum;
             }
         });
 
@@ -525,19 +547,24 @@ public static double[] concurrentBlockedVector(double[] src1, Shape shape1, doub
         double[] dest = new double[rows1];
         int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii+blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (rowStart, rowEnd) -> {
+            for(int ii=rowStart; ii<rowEnd; ii+=blockSize) {
+                int iBound = Math.min(ii+blockSize, rows1);
 
-            for(int kk=0; kk<rows2; kk+=blockSize) {
-                int kBound = Math.min(kk+blockSize, rows2);
+                for(int kk=0; kk<rows2; kk+=blockSize) {
+                    int kBound = Math.min(kk+blockSize, rows2);
 
-                // Multiply the current blocks
-                for(int i=ii; i<iBound; i++) {
-                    int src1Index = i*cols1 + kk;
-                    int src2Index = kk;
+                    // Multiply the current blocks
+                    for(int i=ii; i<iBound; i++) {
+                        int src1Index = i*cols1 + kk;
+                        int src2Index = kk;
+                        double sum = 0;
 
-                    while(src2Index<kBound) {
-                        dest[i] += src1[src1Index++]*src2[src2Index++];
+                        while(src2Index<kBound) {
+                            sum += src1[src1Index++]*src2[src2Index++];
+                        }
+
+                        dest[i] += sum;
                     }
                 }
             }
@@ -545,4 +572,23 @@ public static double[] concurrentBlockedVector(double[] src1, Shape shape1, doub
 
         return dest;
     }
+
+
+    public static void main(String[] args) {
+        RandomTensor rtg = new RandomTensor();
+        Shape shape = new Shape(8192, 8192);
+        Matrix A = rtg.randomMatrix(shape, -100, 100);
+        Matrix B = rtg.randomMatrix(shape, -100, 100);
+
+        int warmup = 1;
+        for(int i=0; i<warmup; i++) {
+            A.mult(B);
+        }
+
+        long s = System.nanoTime();
+        A.mult(B);
+        double t = (System.nanoTime() - s)*1.0e-6;
+
+        System.out.printf("Time: %.5f ms\n", t);
+    }
 }
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseTranspose.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseTranspose.java
index 7d62958be..0984199e5 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseTranspose.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseTranspose.java
@@ -27,7 +27,6 @@
 import org.flag4j.concurrency.Configurations;
 import org.flag4j.concurrency.ThreadManager;
 import org.flag4j.core.Shape;
-import org.flag4j.rng.RandomArray;
 import org.flag4j.util.ArrayUtils;
 import org.flag4j.util.ErrorMessages;
 import org.flag4j.util.ParameterChecks;
@@ -128,10 +127,12 @@ public static double[] standardConcurrent(final double[] src, final Shape shape,
         double[] dest = new double[shape.totalEntries().intValue()];
         Shape destShape = shape.swapAxes(axes);
 
-        ThreadManager.concurrentLoop(0, src.length, (i) -> {
-            int[] destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axes); // Compute destination indices.
-            dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+        ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int[] destIndices = shape.getIndices(i);
+                ArrayUtils.swapUnsafe(destIndices, axes); // Compute destination indices.
+                dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+            }
         });
 
         return dest;
@@ -157,10 +158,12 @@ public static double[] standardConcurrent(final double[] src, final Shape shape,
         Shape destShape = shape.swapAxes(axis1, axis2);
 
         // Compute transpose concurrently
-        ThreadManager.concurrentLoop(0, src.length, (i) -> {
-            int[] destIndices = shape.getIndices(i);
-            ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
-            dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+        ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int[] destIndices = shape.getIndices(i);
+                ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
+                dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+            }
         });
 
         return dest;
@@ -248,14 +251,16 @@ public static double[] standardMatrixConcurrent(final double[] src, final int nu
         double[] dest = new double[src.length];
 
         // Compute transpose concurrently.
-        ThreadManager.concurrentLoop(0, numCols, (i) -> {
-            int srcIndex = i;
-            int destIndex = i*numRows;
-            int end = destIndex + numRows;
-
-            while (destIndex < end) {
-                dest[destIndex++] = src[srcIndex];
-                srcIndex += numCols;
+        ThreadManager.concurrentOperation(numCols, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int srcIndex = i;
+                int destIndex = i*numRows;
+                int end = destIndex + numRows;
+
+                while (destIndex < end) {
+                    dest[destIndex++] = src[srcIndex];
+                    srcIndex += numCols;
+                }
             }
         });
 
@@ -275,26 +280,28 @@ public static double[] blockedMatrixConcurrent(final double[] src, final int num
         final int blockSize = Configurations.getBlockSize();
 
         // Compute transpose concurrently.
-        ThreadManager.concurrentLoop(0, numRows, blockSize, (ii)->{
-            int blockHeight = Math.min(ii+blockSize, numRows) - ii;
-            int srcIndexStart = ii*numCols;
-            int destIndexStart = ii;
-
-            for(int jj=0; jj<numCols; jj+=blockSize) {
-                int srcIndexEnd = srcIndexStart + Math.min(numCols-jj, blockSize);
-
-                while(srcIndexStart<srcIndexEnd) {
-                    int srcIndex = srcIndexStart;
-                    int destIndex = destIndexStart;
-                    int destIndexEnd = destIndex + blockHeight;
-
-                    while(destIndex<destIndexEnd) {
-                        dest[destIndex++] = src[srcIndex];
-                        srcIndex+=numCols;
+        ThreadManager.concurrentBlockedOperation(numRows, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int blockHeight = Math.min(ii+blockSize, numRows) - ii;
+                int srcIndexStart = ii*numCols;
+                int destIndexStart = ii;
+
+                for(int jj=0; jj<numCols; jj+=blockSize) {
+                    int srcIndexEnd = srcIndexStart + Math.min(numCols-jj, blockSize);
+
+                    while(srcIndexStart<srcIndexEnd) {
+                        int srcIndex = srcIndexStart;
+                        int destIndex = destIndexStart;
+                        int destIndexEnd = destIndex + blockHeight;
+
+                        while(destIndex<destIndexEnd) {
+                            dest[destIndex++] = src[srcIndex];
+                            srcIndex+=numCols;
+                        }
+
+                        destIndexStart += numRows;
+                        srcIndexStart++;
                     }
-
-                    destIndexStart += numRows;
-                    srcIndexStart++;
                 }
             }
         });
@@ -310,7 +317,6 @@ public static double[] blockedMatrixConcurrent(final double[] src, final int num
      * @return The transpose of the matrix.
      */
     public static int[][] standardIntMatrix(final int[][] src) {
-
         int rows = src.length;
         int cols = src[0].length;
         int[][] dest = new int[cols][rows];
@@ -353,44 +359,4 @@ public static int[][] blockedIntMatrix(final int[][] src) {
 
         return dest;
     }
-
-
-    public static void main(String[] args) {
-        RandomArray rag = new RandomArray();
-
-        int warmupRuns = 5;
-        int numRuns = 10;
-
-        int rows = 500;
-        int cols = 500;
-
-        int[][] arr = new int[rows][cols];
-
-        double bTime = 0;
-        double sTime = 0;
-
-        for(int i=0; i<numRuns+warmupRuns; i++) {
-            // Generate random array to transpose.
-            for(int k=0; k<rows; k++) {
-                arr[k] = rag.genUniformRealIntArray(cols, -100, 100);
-            }
-
-            long sStart = System.nanoTime();
-            standardIntMatrix(arr);
-            long sEnd = System.nanoTime();
-
-            long bStart = System.nanoTime();
-            blockedIntMatrix(arr);
-            long bEnd = System.nanoTime();
-
-            if(i >= warmupRuns) {
-                bTime += (bEnd-bStart)*10e-6;
-                sTime += (sEnd-sStart)*10e-6;
-            }
-        }
-
-        System.out.printf("Shape: (%d, %d)\n\n", rows, cols);
-        System.out.printf("Standard Time: %.5f ms\n", sTime/numRuns);
-        System.out.printf("Blocked Time: %.5f ms\n", bTime/numRuns);
-    }
 }
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java
index 81865c709..2dc0de633 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java
@@ -97,12 +97,14 @@ public static double[] outerProduct(double[] src1, double[] src2) {
     public static double[] outerProductConcurrent(double[] src1, double[] src2) {
         double[] dest = new double[src1.length*src2.length];
 
-        ThreadManager.concurrentLoop(0, src1.length, (int i)->{
-            int destIndex = i*src2.length;
-            double v1 = src1[i];
-
-            for(double v2 : src2) {
-                dest[destIndex++] = v1*v2;
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int destIndex = i*src2.length;
+                double v1 = src1[i];
+
+                for(double v2 : src2) {
+                    dest[destIndex++] = v1*v2;
+                }
             }
         });
 
diff --git a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemDiv.java b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemDiv.java
index e52146d86..c36526b94 100644
--- a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemDiv.java
+++ b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemDiv.java
@@ -81,9 +81,11 @@ public static CNumber[] elemDivConcurrent(CNumber[] src1, Shape shape1, double[]
         ParameterChecks.assertEqualShape(shape1, shape2);
         CNumber[] product = new CNumber[src1.length];
 
-        ThreadManager.concurrentLoop(0, product.length,
-                (i)->product[i] = src1[i].div(src2[i])
-        );
+        ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                product[i] = src1[i].div(src2[i]);
+            }
+        });
 
         return product;
     }
@@ -125,9 +127,11 @@ public static CNumber[] elemDivConcurrent(double[] src1, Shape shape1, CNumber[]
         ParameterChecks.assertEqualShape(shape1, shape2);
         CNumber[] quotient = new CNumber[src1.length];
 
-        ThreadManager.concurrentLoop(0, quotient.length, (i)-> {
-            double divisor = src2[i].re*src2[i].re + src2[i].im*src2[i].im;
-            quotient[i] = new CNumber(src1[i]*src2[i].re / divisor, -src1[i]*src2[i].im / divisor);
+        ThreadManager.concurrentOperation(quotient.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                double divisor = src2[i].re*src2[i].re + src2[i].im*src2[i].im;
+                quotient[i] = new CNumber(src1[i]*src2[i].re / divisor, -src1[i]*src2[i].im / divisor);
+            }
         });
 
         return quotient;
diff --git a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemMult.java b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemMult.java
index 2f41dff99..3d8a25393 100644
--- a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemMult.java
+++ b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseElemMult.java
@@ -82,9 +82,11 @@ public static CNumber[] elemMultConcurrent(CNumber[] src1, Shape shape1, double[
         ParameterChecks.assertEqualShape(shape1, shape2);
         CNumber[] product = new CNumber[src1.length];
 
-        ThreadManager.concurrentLoop(0, product.length,
-                (i)->product[i] = src1[i].mult(src2[i])
-        );
+        ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                product[i] = src1[i].mult(src2[i]);
+            }
+        });
 
         return product;
     }
diff --git a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultTranspose.java b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultTranspose.java
index 623e5d6fc..b779b5077 100644
--- a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultTranspose.java
+++ b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultTranspose.java
@@ -73,10 +73,13 @@ public static CNumber[] multTranspose(CNumber[] src1, Shape shape1, double[] src
                 src1Index = src1IndexStart;
                 src2Index = j*cols2;
                 destIndex = destIndexStart + j;
+                CNumber sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                    sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -101,7 +104,7 @@ public static CNumber[] multTransposeBlocked(CNumber[] src1, Shape shape1, doubl
         CNumber[] dest = new CNumber[rows1*rows2];
         Arrays.fill(dest, CNumber.ZERO);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, jBound, kBound;
         int src1Start, destStart, end;
         int destIndex, src1Index, src2Index;
@@ -125,10 +128,13 @@ public static CNumber[] multTransposeBlocked(CNumber[] src1, Shape shape1, doubl
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = j*cols2 + kk;
+                            CNumber sum = dest[destIndex];
 
                             while(src1Index<end) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                                sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -156,19 +162,24 @@ public static CNumber[] multTransposeConcurrent(CNumber[] src1, Shape shape1, do
 
         CNumber[] dest = new CNumber[rows1*rows2]; // Since second matrix is transposed, its columns will become rows.
         Arrays.fill(dest, CNumber.ZERO);
+        
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols2;
+                int destIndexStart = i*rows2;
+                int end = src1IndexStart + cols2;
+
+                for(int j=0; j<rows2; j++) {
+                    int src1Index = src1IndexStart;
+                    int src2Index = j*cols2;
+                    int destIndex = destIndexStart + j;
+                    CNumber sum = dest[destIndex];
+
+                    while(src1Index<end) {
+                        sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                    }
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols2;
-            int destIndexStart = i*rows2;
-            int end = src1IndexStart + cols2;
-
-            for(int j=0; j<rows2; j++) {
-                int src1Index = src1IndexStart;
-                int src2Index = j*cols2;
-                int destIndex = destIndexStart + j;
-
-                while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -194,30 +205,35 @@ public static CNumber[] multTransposeBlockedConcurrent(CNumber[] src1, Shape sha
         CNumber[] dest = new CNumber[rows1*rows2];
         Arrays.fill(dest, CNumber.ZERO);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii)->{
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int jj = 0; jj<rows2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, rows2);
+                for(int jj = 0; jj<rows2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, rows2);
 
-                for(int kk = 0; kk<cols2; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols2);
+                    for(int kk = 0; kk<cols2; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols2);
 
-                    // Multiply the blocks
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*rows2;
-                        int src1Start = i*cols2 + kk;
-                        int end = src1Start + kBound - kk;
+                        // Multiply the blocks
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*rows2;
+                            int src1Start = i*cols2 + kk;
+                            int end = src1Start + kBound - kk;
 
-                        for(int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = j*cols2 + kk;
+                            for(int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = j*cols2 + kk;
+                                CNumber sum = dest[destIndex];
 
-                            while(src1Index<end) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index++]));
+                                while(src1Index<end) {
+                                    sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                                }
+
+                                dest[destIndex] = sum;
                             }
                         }
                     }
@@ -257,10 +273,13 @@ public static CNumber[] multTranspose(double[] src1, Shape shape1, CNumber[] src
                 src1Index = src1IndexStart;
                 src2Index = j*cols2;
                 destIndex = destIndexStart + j;
+                CNumber sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src2[src2Index++].mult(src1[src1Index++]));
+                    sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -285,7 +304,7 @@ public static CNumber[] multTransposeBlocked(double[] src1, Shape shape1, CNumbe
         CNumber[] dest = new CNumber[rows1*rows2];
         Arrays.fill(dest, CNumber.ZERO);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, jBound, kBound;
         int src1Start, destStart, end;
         int destIndex, src1Index, src2Index;
@@ -309,10 +328,13 @@ public static CNumber[] multTransposeBlocked(double[] src1, Shape shape1, CNumbe
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = j*cols2 + kk;
+                            CNumber sum = dest[destIndex];
 
                             while(src1Index<end) {
-                                dest[destIndex] = dest[destIndex].add(src2[src2Index++].mult(src1[src1Index++]));
+                                sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -341,18 +363,23 @@ public static CNumber[] multTransposeConcurrent(double[] src1, Shape shape1, CNu
         CNumber[] dest = new CNumber[rows1*rows2]; // Since second matrix is transposed, its columns will become rows.
         Arrays.fill(dest, CNumber.ZERO);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols2;
-            int destIndexStart = i*rows2;
-            int end = src1IndexStart + cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols2;
+                int destIndexStart = i*rows2;
+                int end = src1IndexStart + cols2;
 
-            for(int j=0; j<rows2; j++) {
-                int src1Index = src1IndexStart;
-                int src2Index = j*cols2;
-                int destIndex = destIndexStart + j;
+                for(int j=0; j<rows2; j++) {
+                    int src1Index = src1IndexStart;
+                    int src2Index = j*cols2;
+                    int destIndex = destIndexStart + j;
+                    CNumber sum = dest[destIndex];
 
-                while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src2[src2Index++].mult(src1[src1Index++]));
+                    while(src1Index<end) {
+                        sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
+                    }
+
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -377,31 +404,35 @@ public static CNumber[] multTransposeBlockedConcurrent(double[] src1, Shape shap
 
         CNumber[] dest = new CNumber[rows1*rows2];
         Arrays.fill(dest, CNumber.ZERO);
+        final int blockSize = Configurations.getBlockSize();
 
-        int blockSize = Configurations.getBlockSize();
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii)->{
-            int iBound = Math.min(ii + blockSize, rows1);
+                for(int jj = 0; jj<rows2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, rows2);
 
-            for(int jj = 0; jj<rows2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, rows2);
+                    for(int kk = 0; kk<cols2; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols2);
 
-                for(int kk = 0; kk<cols2; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols2);
+                        // Multiply the blocks
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*rows2;
+                            int src1Start = i*cols2 + kk;
+                            int end = src1Start + kBound - kk;
 
-                    // Multiply the blocks
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*rows2;
-                        int src1Start = i*cols2 + kk;
-                        int end = src1Start + kBound - kk;
+                            for(int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = j*cols2 + kk;
+                                CNumber sum = dest[destIndex];
 
-                        for(int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = j*cols2 + kk;
+                                while(src1Index<end) {
+                                    sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
+                                }
 
-                            while(src1Index<end) {
-                                dest[destIndex] = dest[destIndex].add(src2[src2Index++].mult(src1[src1Index++]));
+                                dest[destIndex] = sum;
                             }
                         }
                     }
diff --git a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultiplication.java
index c34c82c72..8f92f5521 100644
--- a/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/dense/real_complex/RealComplexDenseMatrixMultiplication.java
@@ -60,7 +60,7 @@ public static CNumber[] standard(double[] src1, Shape shape1, CNumber[] src2, Sh
         int rows2 = shape2.get(0);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
         int src1Index, src2Index, destIndex, src1IndexStart, destIndexStart, end;
@@ -74,11 +74,14 @@ public static CNumber[] standard(double[] src1, Shape shape1, CNumber[] src2, Sh
                 src1Index = src1IndexStart;
                 destIndex = destIndexStart + j;
                 end = src1Index + rows2;
+                CNumber sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src2[src2Index].mult(src1[src1Index++]));
+                    sum = sum.add(src2[src2Index].mult(src1[src1Index++]));
                     src2Index += cols2;
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -100,7 +103,7 @@ public static CNumber[] reordered(double[] src1, Shape shape1, CNumber[] src2, S
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
         int src2Index, destIndex, src1Start, destIndexStart, end;
@@ -140,10 +143,10 @@ public static CNumber[] blocked(double[] src1, Shape shape1, CNumber[] src2, Sha
         int cols2 = shape2.get(1);
         int cols1 = shape1.get(1);
 
-        CNumber[] dest = new CNumber[rows1 * cols2];
+        final CNumber[] dest = new CNumber[rows1 * cols2];
         ArrayUtils.fill(dest, 0);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, jBound, kBound;
         int src1Start, destStart, stopIndex;
         int destIndex, src1Index, src2Index;
@@ -165,11 +168,14 @@ public static CNumber[] blocked(double[] src1, Shape shape1, CNumber[] src2, Sha
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = kk*cols2 + j;
+                            CNumber sum = dest[destIndex];
 
                             while(src1Index < stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src2[src2Index].mult(src1[src1Index++]));
+                                sum = sum.add(src2[src2Index].mult(src1[src1Index++]));
                                 src2Index += cols2;
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -194,10 +200,10 @@ public static CNumber[] blockedReordered(double[] src1, Shape shape1, CNumber[]
         int cols2 = shape2.get(1);
         int cols1 = shape1.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, jBound, kBound;
         int destStart, src1Start, stopIndex;
         int destIndex, src1Index, src2Index;
@@ -251,21 +257,26 @@ public static CNumber[] concurrentStandard(double[] src1, Shape shape1, CNumber[
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols1;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols1;
+                int destIndexStart = i*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                int src2Index = j;
-                int src1Index = src1IndexStart;
-                int destIndex = destIndexStart + j;
+                for(int j=0; j<cols2; j++) {
+                    int src2Index = j;
+                    int src1Index = src1IndexStart;
+                    int destIndex = destIndexStart + j;
+                    CNumber sum = dest[destIndex];
 
-                for(int k=0; k<cols1; k++) {
-                    dest[destIndex] = dest[destIndex].add(src2[src2Index].mult(src1[src1Index++]));
-                    src2Index += cols2;
+                    for(int k=0; k<cols1; k++) {
+                        sum = sum.add(src2[src2Index].mult(src1[src1Index++]));
+                        src2Index += cols2;
+                    }
+
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -288,21 +299,23 @@ public static CNumber[] concurrentReordered(double[] src1, Shape shape1, CNumber
         int rows2 = shape2.get(0);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*rows2;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*rows2;
+                int destIndexStart = i*cols2;
 
-            for(int k=0; k<rows2; k++) {
-                int src2Index = k*cols2;
-                int destIndex = destIndexStart;
-                int end = src2Index + cols2;
+                for(int k=0; k<rows2; k++) {
+                    int src2Index = k*cols2;
+                    int destIndex = destIndexStart;
+                    int end = src2Index + cols2;
 
-                while(src2Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src2[src2Index++].mult(src1[src1IndexStart + k]));
-                    destIndex++;
+                    while(src2Index<end) {
+                        dest[destIndex] = dest[destIndex].add(src2[src2Index++].mult(src1[src1IndexStart + k]));
+                        destIndex++;
+                    }
                 }
             }
         });
@@ -325,33 +338,38 @@ public static CNumber[] concurrentBlocked(double[] src1, Shape shape1, CNumber[]
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int jj = 0; jj<cols2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, cols2);
+                for(int jj = 0; jj<cols2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, cols2);
 
-                for(int kk = 0; kk<cols1; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols1);
+                    for(int kk = 0; kk<cols1; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols1);
 
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int src1Start = i*cols1 + kk;
-                        int stopIndex = src1Start+(kBound-kk);
-                        int destStart = i*cols2;
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int src1Start = i*cols1 + kk;
+                            int stopIndex = src1Start+(kBound-kk);
+                            int destStart = i*cols2;
 
-                        for (int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = kk*cols2 + j;
+                            for (int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = kk*cols2 + j;
+                                CNumber sum = dest[destIndex];
 
-                            while(src1Index < stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src2[src2Index].mult(src1[src1Index++]));
-                                src2Index+=cols2;
+                                while(src1Index < stopIndex) {
+                                    sum = sum.add(src2[src2Index].mult(src1[src1Index++]));
+                                    src2Index+=cols2;
+                                }
+
+                                dest[destIndex] = sum;
                             }
                         }
                     }
@@ -377,34 +395,36 @@ public static CNumber[] concurrentBlockedReordered(double[] src1, Shape shape1,
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
-
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
+        final int blockSize = Configurations.getBlockSize();
 
-            for(int kk = 0; kk<cols1; kk+=blockSize) {
-                int kBound = Math.min(kk + blockSize, cols1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-                for(int jj = 0; jj<cols2; jj+=blockSize) {
-                    int jBound = Math.min(jj + blockSize, cols2);
-
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*cols2;
-                        int src1Start = i*cols1;
-                        int stopIndex = destStart+jBound;
-
-                        for (int k=kk; k<kBound; k++) {
-                            int destIndex = destStart + jj;
-                            int src1Index = src1Start + k;
-                            int src2Index = k*cols2 + jj;
+                for(int kk = 0; kk<cols1; kk+=blockSize) {
+                    int kBound = Math.min(kk + blockSize, cols1);
 
-                            while(destIndex<stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src2[src2Index].mult(src1[src1Index]));
-                                destIndex++;
-                                src2Index++;
+                    for(int jj = 0; jj<cols2; jj+=blockSize) {
+                        int jBound = Math.min(jj + blockSize, cols2);
+
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*cols2;
+                            int src1Start = i*cols1;
+                            int stopIndex = destStart+jBound;
+
+                            for (int k=kk; k<kBound; k++) {
+                                int destIndex = destStart + jj;
+                                int src1Index = src1Start + k;
+                                int src2Index = k*cols2 + jj;
+
+                                while(destIndex<stopIndex) {
+                                    dest[destIndex] = dest[destIndex].add(src2[src2Index].mult(src1[src1Index]));
+                                    destIndex++;
+                                    src2Index++;
+                                }
                             }
                         }
                     }
@@ -429,17 +449,20 @@ public static CNumber[] standardVector(double[] src1, Shape shape1, CNumber[] sr
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
         int src1Index, src2Index;
 
         for(int i=0; i<rows1; i++) {
             src1Index = i*cols1;
             src2Index = 0;
+            CNumber sum = dest[i];
 
             while(src2Index<rows2) {
-                dest[i] = dest[i].add(src2[src2Index++].mult(src1[src1Index++]));
+                sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
             }
+
+            dest[i] = sum;
         }
 
         return dest;
@@ -459,9 +482,9 @@ public static CNumber[] blockedVector(double[] src1, Shape shape1, CNumber[] src
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, kBound;
         int src1Index, src2Index;
 
@@ -476,10 +499,13 @@ public static CNumber[] blockedVector(double[] src1, Shape shape1, CNumber[] src
                 for(int i=ii; i<iBound; i++) {
                     src1Index = i*cols1 + kk;
                     src2Index = kk;
+                    CNumber sum = dest[i];
 
                     while(src2Index<kBound) {
-                        dest[i] = dest[i].add(src2[src2Index++].mult(src1[src1Index++]));
+                        sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
                     }
+
+                    dest[i] = sum;
                 }
             }
         }
@@ -502,15 +528,20 @@ public static CNumber[] concurrentStandardVector(double[] src1, Shape shape1, CN
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1Index = i*cols1;
-            int src2Index = 0;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1Index = i*cols1;
+                int src2Index = 0;
+                CNumber sum = dest[i];
 
-            while(src2Index<rows2) {
-                dest[i] = dest[i].add(src2[src2Index++].mult(src1[src1Index++]));
+                while(src2Index<rows2) {
+                    sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
+                }
+
+                dest[i] = sum;
             }
         });
 
@@ -532,23 +563,28 @@ public static CNumber[] concurrentBlockedVector(double[] src1, Shape shape1, CNu
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii+blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii+blockSize, rows1);
 
-            for(int kk=0; kk<rows2; kk+=blockSize) {
-                int kBound = Math.min(kk+blockSize, rows2);
+                for(int kk=0; kk<rows2; kk+=blockSize) {
+                    int kBound = Math.min(kk+blockSize, rows2);
 
-                // Multiply the current blocks
-                for(int i=ii; i<iBound; i++) {
-                    int src1Index = i*cols1 + kk;
-                    int src2Index = kk;
+                    // Multiply the current blocks
+                    for(int i=ii; i<iBound; i++) {
+                        int src1Index = i*cols1 + kk;
+                        int src2Index = kk;
+                        CNumber sum = dest[i];
 
-                    while(src2Index<kBound) {
-                        dest[i] = dest[i].add(src2[src2Index++].mult(src1[src1Index++]));
+                        while(src2Index<kBound) {
+                            sum = sum.add(src2[src2Index++].mult(src1[src1Index++]));
+                        }
+
+                        dest[i] = sum;
                     }
                 }
             }
@@ -571,7 +607,7 @@ public static CNumber[] standard(CNumber[] src1, Shape shape1, double[] src2, Sh
         int rows2 = shape2.get(0);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
         int src1Index, src2Index, destIndex, src1IndexStart, destIndexStart, end;
@@ -585,11 +621,14 @@ public static CNumber[] standard(CNumber[] src1, Shape shape1, double[] src2, Sh
                 src1Index = src1IndexStart;
                 destIndex = destIndexStart + j;
                 end = src1Index + rows2;
+                CNumber sum = dest[destIndex];
 
                 while(src1Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
+                    sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
                     src2Index += cols2;
                 }
+
+                dest[destIndex] = sum;
             }
         }
 
@@ -611,7 +650,7 @@ public static CNumber[] reordered(CNumber[] src1, Shape shape1, double[] src2, S
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
         int src2Index, destIndex, src1Start, destIndexStart, end;
@@ -651,10 +690,10 @@ public static CNumber[] blocked(CNumber[] src1, Shape shape1, double[] src2, Sha
         int cols2 = shape2.get(1);
         int cols1 = shape1.get(1);
 
-        CNumber[] dest = new CNumber[rows1 * cols2];
+        final CNumber[] dest = new CNumber[rows1 * cols2];
         ArrayUtils.fill(dest, 0);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, jBound, kBound;
         int src1Start, destStart, stopIndex;
         int destIndex, src1Index, src2Index;
@@ -676,11 +715,14 @@ public static CNumber[] blocked(CNumber[] src1, Shape shape1, double[] src2, Sha
                             destIndex = destStart + j;
                             src1Index = src1Start;
                             src2Index = kk*cols2 + j;
+                            CNumber sum = dest[destIndex];
 
                             while(src1Index < stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
+                                sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
                                 src2Index+=cols2;
                             }
+
+                            dest[destIndex] = sum;
                         }
                     }
                 }
@@ -705,10 +747,10 @@ public static CNumber[] blockedReordered(CNumber[] src1, Shape shape1, double[]
         int cols2 = shape2.get(1);
         int cols1 = shape1.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, jBound, kBound;
         int destStart, src1Start, stopIndex;
         int destIndex, src1Index, src2Index;
@@ -762,21 +804,26 @@ public static CNumber[] concurrentStandard(CNumber[] src1, Shape shape1, double[
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*cols1;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*cols1;
+                int destIndexStart = i*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                int src2Index = j;
-                int src1Index = src1IndexStart;
-                int destIndex = destIndexStart + j;
+                for(int j=0; j<cols2; j++) {
+                    int src2Index = j;
+                    int src1Index = src1IndexStart;
+                    int destIndex = destIndexStart + j;
+                    CNumber sum = dest[destIndex];
 
-                for(int k=0; k<cols1; k++) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
-                    src2Index += cols2;
+                    for(int k=0; k<cols1; k++) {
+                        sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
+                        src2Index += cols2;
+                    }
+
+                    dest[destIndex] = sum;
                 }
             }
         });
@@ -799,21 +846,23 @@ public static CNumber[] concurrentReordered(CNumber[] src1, Shape shape1, double
         int rows2 = shape2.get(0);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1IndexStart = i*rows2;
-            int destIndexStart = i*cols2;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1IndexStart = i*rows2;
+                int destIndexStart = i*cols2;
 
-            for(int k=0; k<rows2; k++) {
-                int src2Index = k*cols2;
-                int destIndex = destIndexStart;
-                int end = src2Index + cols2;
+                for(int k=0; k<rows2; k++) {
+                    int src2Index = k*cols2;
+                    int destIndex = destIndexStart;
+                    int end = src2Index + cols2;
 
-                while(src2Index<end) {
-                    dest[destIndex] = dest[destIndex].add(src1[src1IndexStart + k].mult(src2[src2Index++]));
-                    destIndex++;
+                    while(src2Index<end) {
+                        dest[destIndex] = dest[destIndex].add(src1[src1IndexStart + k].mult(src2[src2Index++]));
+                        destIndex++;
+                    }
                 }
             }
         });
@@ -836,33 +885,38 @@ public static CNumber[] concurrentBlocked(CNumber[] src1, Shape shape1, double[]
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-            for(int jj = 0; jj<cols2; jj+=blockSize) {
-                int jBound = Math.min(jj + blockSize, cols2);
+                for(int jj = 0; jj<cols2; jj+=blockSize) {
+                    int jBound = Math.min(jj + blockSize, cols2);
 
-                for(int kk = 0; kk<cols1; kk+=blockSize) {
-                    int kBound = Math.min(kk + blockSize, cols1);
+                    for(int kk = 0; kk<cols1; kk+=blockSize) {
+                        int kBound = Math.min(kk + blockSize, cols1);
 
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int src1Start = i*cols1 + kk;
-                        int stopIndex = src1Start+(kBound-kk);
-                        int destStart = i*cols2;
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int src1Start = i*cols1 + kk;
+                            int stopIndex = src1Start+(kBound-kk);
+                            int destStart = i*cols2;
 
-                        for (int j=jj; j<jBound; j++) {
-                            int destIndex = destStart + j;
-                            int src1Index = src1Start;
-                            int src2Index = kk*cols2 + j;
+                            for (int j=jj; j<jBound; j++) {
+                                int destIndex = destStart + j;
+                                int src1Index = src1Start;
+                                int src2Index = kk*cols2 + j;
+                                CNumber sum = dest[destIndex];
 
-                            while(src1Index < stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index++].mult(src2[src2Index]));
-                                src2Index+=cols2;
+                                while(src1Index < stopIndex) {
+                                    sum = sum.add(src1[src1Index++].mult(src2[src2Index]));
+                                    src2Index+=cols2;
+                                }
+
+                                dest[destIndex] = sum;
                             }
                         }
                     }
@@ -888,34 +942,36 @@ public static CNumber[] concurrentBlockedReordered(CNumber[] src1, Shape shape1,
         int cols1 = shape1.get(1);
         int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        final CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
-
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii + blockSize, rows1);
+        final int blockSize = Configurations.getBlockSize();
 
-            for(int kk = 0; kk<cols1; kk+=blockSize) {
-                int kBound = Math.min(kk + blockSize, cols1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii + blockSize, rows1);
 
-                for(int jj = 0; jj<cols2; jj+=blockSize) {
-                    int jBound = Math.min(jj + blockSize, cols2);
-
-                    // Multiply current blocks.
-                    for(int i=ii; i<iBound; i++) {
-                        int destStart = i*cols2;
-                        int src1Start = i*cols1;
-                        int stopIndex = destStart+jBound;
-
-                        for (int k=kk; k<kBound; k++) {
-                            int destIndex = destStart + jj;
-                            int src1Index = src1Start + k;
-                            int src2Index = k*cols2 + jj;
+                for(int kk = 0; kk<cols1; kk+=blockSize) {
+                    int kBound = Math.min(kk + blockSize, cols1);
 
-                            while(destIndex<stopIndex) {
-                                dest[destIndex] = dest[destIndex].add(src1[src1Index].mult(src2[src2Index]));
-                                destIndex++;
-                                src2Index++;
+                    for(int jj = 0; jj<cols2; jj+=blockSize) {
+                        int jBound = Math.min(jj + blockSize, cols2);
+
+                        // Multiply current blocks.
+                        for(int i=ii; i<iBound; i++) {
+                            int destStart = i*cols2;
+                            int src1Start = i*cols1;
+                            int stopIndex = destStart+jBound;
+
+                            for (int k=kk; k<kBound; k++) {
+                                int destIndex = destStart + jj;
+                                int src1Index = src1Start + k;
+                                int src2Index = k*cols2 + jj;
+
+                                while(destIndex<stopIndex) {
+                                    dest[destIndex] = dest[destIndex].add(src1[src1Index].mult(src2[src2Index]));
+                                    destIndex++;
+                                    src2Index++;
+                                }
                             }
                         }
                     }
@@ -940,17 +996,20 @@ public static CNumber[] standardVector(CNumber[] src1, Shape shape1, double[] sr
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
         int src1Index, src2Index;
 
         for(int i=0; i<rows1; i++) {
             src1Index = i*cols1;
             src2Index = 0;
+            CNumber sum = dest[i];
 
             while(src2Index<rows2) {
-                dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
             }
+
+            dest[i] = sum;
         }
 
         return dest;
@@ -970,9 +1029,9 @@ public static CNumber[] blockedVector(CNumber[] src1, Shape shape1, double[] src
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
         int iBound, kBound;
         int src1Index, src2Index;
 
@@ -987,10 +1046,13 @@ public static CNumber[] blockedVector(CNumber[] src1, Shape shape1, double[] src
                 for(int i=ii; i<iBound; i++) {
                     src1Index = i*cols1 + kk;
                     src2Index = kk;
+                    CNumber sum = dest[i];
 
                     while(src2Index<kBound) {
-                        dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                        sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
                     }
+
+                    dest[i] = sum;
                 }
             }
         }
@@ -1013,15 +1075,20 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, Shape shape1, d
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            int src1Index = i*cols1;
-            int src2Index = 0;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int src1Index = i*cols1;
+                int src2Index = 0;
+                CNumber sum = dest[i];
 
-            while(src2Index<rows2) {
-                dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                while(src2Index<rows2) {
+                    sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                }
+
+                dest[i] = sum;
             }
         });
 
@@ -1043,23 +1110,28 @@ public static CNumber[] concurrentBlockedVector(CNumber[] src1, Shape shape1, do
         int cols1 = shape1.get(1);
         int rows2 = shape2.get(0);
 
-        CNumber[] dest = new CNumber[rows1];
+        final CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
-        int blockSize = Configurations.getBlockSize();
+        final int blockSize = Configurations.getBlockSize();
 
-        ThreadManager.concurrentLoop(0, rows1, blockSize, (ii) -> {
-            int iBound = Math.min(ii+blockSize, rows1);
+        ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=blockSize) {
+                int iBound = Math.min(ii+blockSize, rows1);
 
-            for(int kk=0; kk<rows2; kk+=blockSize) {
-                int kBound = Math.min(kk+blockSize, rows2);
+                for(int kk=0; kk<rows2; kk+=blockSize) {
+                    int kBound = Math.min(kk+blockSize, rows2);
 
-                // Multiply the current blocks
-                for(int i=ii; i<iBound; i++) {
-                    int src1Index = i*cols1 + kk;
-                    int src2Index = kk;
+                    // Multiply the current blocks
+                    for(int i=ii; i<iBound; i++) {
+                        int src1Index = i*cols1 + kk;
+                        int src2Index = kk;
+                        CNumber sum = dest[i];
 
-                    while(src2Index<kBound) {
-                        dest[i] = dest[i].add(src1[src1Index++].mult(src2[src2Index++]));
+                        while(src2Index<kBound) {
+                            sum = sum.add(src1[src1Index++].mult(src2[src2Index++]));
+                        }
+
+                        dest[i] = sum;
                     }
                 }
             }
diff --git a/src/main/java/org/flag4j/operations/dense_sparse/coo/complex/ComplexDenseSparseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/dense_sparse/coo/complex/ComplexDenseSparseMatrixMultiplication.java
index 79b347ba4..b6ce7e1e6 100644
--- a/src/main/java/org/flag4j/operations/dense_sparse/coo/complex/ComplexDenseSparseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/dense_sparse/coo/complex/ComplexDenseSparseMatrixMultiplication.java
@@ -135,15 +135,19 @@ public static CNumber[] concurrentStandard(CNumber[] src1, Shape shape1, CNumber
         CNumber[] dest = new CNumber[rows1*cols2];
         Arrays.fill(dest, CNumber.ZERO); // Initialize to zeros
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            // Loop over non-zero entries of sparse matrix.
-            for(int j=0; j<src2.length; j++) {
-                int row = rowIndices[j];
-                int col = colIndices[j];
-                CNumber product = src1[i*cols1 + row].mult(src2[j]);
-
-                synchronized (dest) {
-                    dest[i*cols2 + col] = dest[i*cols2 + col].add(product);
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int rowOffset = i*cols2;
+
+                // Loop over non-zero entries of sparse matrix.
+                for(int j=0; j<src2.length; j++) {
+                    int row = rowIndices[j];
+                    int col = colIndices[j];
+                    CNumber product = src1[i*cols1 + row].mult(src2[j]);
+
+                    synchronized (dest) {
+                        dest[rowOffset + col] = dest[rowOffset + col].add(product);
+                    }
                 }
             }
         });
@@ -172,15 +176,18 @@ public static CNumber[] concurrentStandard(CNumber[] src1, int[] rowIndices, int
         CNumber[] dest = new CNumber[rows1*cols2];
         Arrays.fill(dest, CNumber.ZERO); // Initialize to zeros
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
+                int rowOffset = row*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                CNumber product = src1[i].mult(src2[col*cols2 + j]);
+                for(int j=0; j<cols2; j++) {
+                    CNumber product = src1[i].mult(src2[col*cols2 + j]);
 
-                synchronized (dest) {
-                    dest[row*cols2 + j] = dest[row*cols2 + j].add(product);
+                    synchronized (dest) {
+                        dest[rowOffset + j] = dest[rowOffset + j].add(product);
+                    }
                 }
             }
         });
@@ -306,15 +313,17 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, Shape shape1, C
         CNumber[] dest = new CNumber[rows1];
         Arrays.fill(dest, CNumber.ZERO); // Initialize to zeros
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            CNumber val = dest[i];
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                CNumber sum = dest[i];
 
-            for(int j=0; j<rows2; j++) {
-                int k = indices[j];
-                val = val.add(src1[i*cols1 + k].mult(src2[j]));
-            }
+                for(int j=0; j<rows2; j++) {
+                    int k = indices[j];
+                    sum = sum.add(src1[i*cols1 + k].mult(src2[j]));
+                }
 
-            dest[i] = val; // Update destination entry.
+                dest[i] = sum; // Update destination entry.
+            }
         });
 
         return dest;
@@ -337,14 +346,16 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, int[] rowIndice
         CNumber[] dest = new CNumber[rows1];
         Arrays.fill(dest, CNumber.ZERO); // Initialize to zeros
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
 
-            CNumber product = src1[i].mult(src2[col]);
+                CNumber product = src1[i].mult(src2[col]);
 
-            synchronized (dest) {
-                dest[row] = dest[row].add(product);
+                synchronized (dest) {
+                    dest[row] = dest[row].add(product);
+                }
             }
         });
 
@@ -368,22 +379,24 @@ public static CNumber[] concurrentBlockedVector(CNumber[] src1, Shape shape1, CN
         final int bsize = Configurations.getBlockSize(); // Get the block size to use.
 
         CNumber[] dest = new CNumber[rows1];
-        Arrays.fill(dest, CNumber.ZERO); // Initialize to zeros
-
-        // Blocked matrix-vector multiply
-        ThreadManager.concurrentLoop(0, rows1, bsize, (ii) -> {
-            for(int jj=0; jj<rows2; jj += bsize) {
-                // Multiply the current blocks
-                for(int i=ii; i<ii+bsize && i<rows1; i++) {
-                    CNumber val = dest[i];
-                    int src1RowOffset = i*cols1;
-
-                    for(int j=jj; j<jj+bsize && j<rows2; j++) {
-                        int k = indices[j];
-                        val = val.add(src1[src1RowOffset + k].mult(src2[j]));
+        Arrays.fill(dest, CNumber.ZERO); // Initialize to zeros.
+
+        // Blocked matrix-vector multiply.
+        ThreadManager.concurrentBlockedOperation(rows1, bsize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii += bsize) {
+                for(int jj=0; jj<rows2; jj += bsize) {
+                    // Multiply the current blocks
+                    for(int i=ii; i<ii+bsize && i<rows1; i++) {
+                        CNumber val = dest[i];
+                        int src1RowOffset = i*cols1;
+
+                        for(int j=jj; j<jj+bsize && j<rows2; j++) {
+                            int k = indices[j];
+                            val = val.add(src1[src1RowOffset + k].mult(src2[j]));
+                        }
+
+                        dest[i] = val; // Update desitination entry.
                     }
-
-                    dest[i] = val; // Update desitination entry.
                 }
             }
         });
diff --git a/src/main/java/org/flag4j/operations/dense_sparse/coo/real/RealDenseSparseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/dense_sparse/coo/real/RealDenseSparseMatrixMultiplication.java
index 7fa5aa903..112c8b76f 100644
--- a/src/main/java/org/flag4j/operations/dense_sparse/coo/real/RealDenseSparseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/dense_sparse/coo/real/RealDenseSparseMatrixMultiplication.java
@@ -29,8 +29,6 @@
 import org.flag4j.core.Shape;
 import org.flag4j.util.ErrorMessages;
 
-import java.util.concurrent.atomic.AtomicReferenceArray;
-
 /**
  * This class contains low level methods for computing the matrix multiplication (and matrix vector multiplication) between
  * a real dense/sparse matrix and a real sparse/dense matrix or vector.
@@ -42,7 +40,6 @@ private RealDenseSparseMatrixMultiplication() {
         throw new IllegalStateException(ErrorMessages.getUtilityClassErrMsg());
     }
 
-
     // TODO: Investigate if blocked algorithms provide any speedup for multiplying a sparse/dense matrix to a dense/sparse matrix.
 
     /**
@@ -136,23 +133,25 @@ public static double[] concurrentStandard(double[] src1, Shape shape1, double[]
 
         double[] dest = new double[rows1*cols2];
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            double[] localResult = new double[cols2]; // Store the result for the local thread.
-            int destRow = i*cols2;
-            int src1Row = i*cols1;
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                double[] localResult = new double[cols2]; // Store the result for the local thread.
+                int destRow = i*cols2;
+                int src1Row = i*cols1;
 
-            // Loop over non-zero entries of sparse matrix.
-            for(int j=0; j<src2.length; j++) {
-                int row = rowIndices[j];
-                int col = colIndices[j];
+                // Loop over non-zero entries of sparse matrix.
+                for(int j=0; j<src2.length; j++) {
+                    int row = rowIndices[j];
+                    int col = colIndices[j];
 
-                localResult[col] += src1[src1Row + row]*src2[j];
-            }
+                    localResult[col] += src1[src1Row + row]*src2[j];
+                }
 
-            // Update the shared destination array by accumulating the local result.
-            synchronized(dest) {
-                for (int j=0; j<cols2; j++) {
-                    dest[destRow + j] += localResult[j];
+                // Update the shared destination array by accumulating the local result.
+                synchronized(dest) {
+                    for (int j=0; j<cols2; j++) {
+                        dest[destRow + j] += localResult[j];
+                    }
                 }
             }
         });
@@ -180,58 +179,27 @@ public static double[] concurrentStandard(double[] src1, int[] rowIndices, int[]
 
         double[] dest = new double[rows1*cols2];
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int r1 = rowIndices[i];
-            int c1 = colIndices[i];
-
-            int destRowStart = r1 * cols2;
-            int src2RowStart = c1 * cols2;
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int r1 = rowIndices[i];
+                int c1 = colIndices[i];
 
-            double[] localResult = new double[cols2];
-            for (int j = 0; j < cols2; j++) {
-                localResult[j] = src1[i]*src2[src2RowStart + j];
-            }
+                int destRowStart = r1 * cols2;
+                int src2RowStart = c1 * cols2;
 
-            synchronized (dest) {
+                double[] localResult = new double[cols2];
                 for (int j = 0; j < cols2; j++) {
-                    dest[destRowStart + j] += localResult[j];
+                    localResult[j] = src1[i]*src2[src2RowStart + j];
                 }
-            }
-        });
-
-
-        return dest;
-    }
-
-
-    public static double[] concurrentAtomicArray(double[] src1, int[] rowIndices1, int[] colIndices1, Shape shape1,
-                                                 double[] src2, Shape shape2) {
-        int rows1 = shape1.get(0);
-        int cols2 = shape2.get(1);
-
-        double[] dest = new double[rows1 * cols2];
-        AtomicReferenceArray<Double> destAtomic = new AtomicReferenceArray<>(rows1 * cols2);
-        for(int i=0; i<destAtomic.length(); i++) {
-            destAtomic.set(i, 0d);
-        }
-
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int row = rowIndices1[i];
-            int col = colIndices1[i];
-            int destRow = row*cols2;
-            int src2Row = col*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                double prev = destAtomic.get(destRow + j);
-                double update = prev += src1[i]*src2[src2Row + j];
-                destAtomic.compareAndSet(i, prev, update);
+                synchronized (dest) {
+                    for (int j = 0; j < cols2; j++) {
+                        dest[destRowStart + j] += localResult[j];
+                    }
+                }
             }
         });
 
-        // Convert AtomicDoubleArray back to a normal double array for the result.
-        for (int i = 0; i < dest.length; i++) {
-            dest[i] = destAtomic.get(i);
-        }
 
         return dest;
     }
@@ -256,10 +224,14 @@ public static double[] standardVector(double[] src1, Shape shape1, double[] src2
         int k;
 
         for(int i=0; i<denseRows; i++) {
+            double sum = dest[i];
+
             for(int j=0; j<nonZeros; j++) {
                 k = indices[j];
-                dest[i] += src1[i*denseCols + k]*src2[j];
+                sum += src1[i*denseCols + k]*src2[j];
             }
+
+            dest[i] = sum;
         }
 
         return dest;
@@ -316,10 +288,14 @@ public static double[] blockedVector(double[] src1, Shape shape1, double[] src2,
             for(int jj=0; jj<rows2; jj += bsize) {
                 // Multiply the current blocks
                 for(int i=ii; i<ii+bsize && i<rows1; i++) {
+                    double sum = dest[i];
+
                     for(int j=jj; j<jj+bsize && j<rows2; j++) {
                         k = indices[j];
-                        dest[i] += src1[i*cols1 + k]*src2[j];
+                        sum += src1[i*cols1 + k]*src2[j];
                     }
+
+                    dest[i] = sum;
                 }
             }
         }
@@ -343,10 +319,17 @@ public static double[] concurrentStandardVector(double[] src1, Shape shape1, dou
 
         double[] dest = new double[rows1];
 
-        ThreadManager.concurrentLoop(0, rows1, (i) -> {
-            for(int j=0; j<rows2; j++) {
-                int k = indices[j];
-                dest[i] += src1[i*cols1 + k]*src2[j];
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int rowOffset = i*cols1;
+                double sum = dest[i];
+
+                for(int j=0; j<rows2; j++) {
+                    int k = indices[j];
+                    sum += src1[rowOffset + k]*src2[j];
+                }
+
+                dest[i] = sum;
             }
         });
 
@@ -369,14 +352,16 @@ public static double[] concurrentStandardVector(double[] src1, int[] rowIndices,
         int rows1 = shape1.get(0);
         double[] dest = new double[rows1];
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
 
-            double product = src1[i]*src2[col];
+                double product = src1[i]*src2[col];
 
-            synchronized (dest) {
-                dest[row] += product;
+                synchronized (dest) {
+                    dest[row] += product;
+                }
             }
         });
 
@@ -398,17 +383,22 @@ public static double[] concurrentBlockedVector(double[] src1, Shape shape1, doub
         int rows2 = src2.length;
 
         final int bsize = Configurations.getBlockSize(); // Get the block size to use.
-
         double[] dest = new double[rows1];
 
-        // Blocked matrix-vector multiply
-        ThreadManager.concurrentLoop(0, rows1, bsize, (ii) -> {
-            for(int jj=0; jj<rows2; jj += bsize) {
-                // Multiply the current blocks
-                for(int i=ii; i<ii+bsize && i<rows1; i++) {
-                    for(int j=jj; j<jj+bsize && j<rows2; j++) {
-                        int k = indices[j];
-                        dest[i] += src1[i*cols1 + k]*src2[j];
+        // Blocked matrix-vector multiply.
+        ThreadManager.concurrentBlockedOperation(rows1, bsize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii+=bsize) {
+                for(int jj=0; jj<rows2; jj += bsize) {
+                    // Multiply the current blocks
+                    for(int i=ii; i<ii+bsize && i<rows1; i++) {
+                        double sum = dest[i];
+
+                        for(int j=jj; j<jj+bsize && j<rows2; j++) {
+                            int k = indices[j];
+                            sum += src1[i*cols1 + k]*src2[j];
+                        }
+
+                        dest[i] = sum;
                     }
                 }
             }
diff --git a/src/main/java/org/flag4j/operations/dense_sparse/coo/real_complex/RealComplexDenseSparseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/dense_sparse/coo/real_complex/RealComplexDenseSparseMatrixMultiplication.java
index 0cea7e45a..b6f71fa27 100644
--- a/src/main/java/org/flag4j/operations/dense_sparse/coo/real_complex/RealComplexDenseSparseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/dense_sparse/coo/real_complex/RealComplexDenseSparseMatrixMultiplication.java
@@ -141,15 +141,19 @@ public static CNumber[] concurrentStandard(double[] src1, Shape shape1, CNumber[
         CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, i -> {
-            // Loop over non-zero entries of sparse matrix.
-            for(int j=0; j<src2.length; j++) {
-                int row = rowIndices[j];
-                int col = colIndices[j];
-                CNumber product = src2[j].mult(src1[i*cols1 + row]);
-
-                synchronized (dest) {
-                    dest[i*cols2 + col] = dest[i*cols2 + col].add(product);
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int rowOffset = i*cols2;
+
+                // Loop over non-zero entries of sparse matrix.
+                for(int j=0; j<src2.length; j++) {
+                    int row = rowIndices[j];
+                    int col = colIndices[j];
+                    CNumber product = src2[j].mult(src1[i*cols1 + row]);
+
+                    synchronized (dest) {
+                        dest[rowOffset + col] = dest[rowOffset + col].add(product);
+                    }
                 }
             }
         });
@@ -178,15 +182,18 @@ public static CNumber[] concurrentStandard(double[] src1, int[] rowIndices, int[
         CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, src1.length, i -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(var i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
+                int rowOffset = row*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                CNumber product = src2[col*cols2 + j].mult(src1[i]);
+                for(int j=0; j<cols2; j++) {
+                    CNumber product = src2[col*cols2 + j].mult(src1[i]);
 
-                synchronized (dest) {
-                    dest[row*cols2 + j] = dest[row*cols2 + j].add(product);
+                    synchronized (dest) {
+                        dest[rowOffset + j] = dest[rowOffset + j].add(product);
+                    }
                 }
             }
         });
@@ -215,15 +222,17 @@ public static CNumber[] standard(CNumber[] src1, Shape shape1, double[] src2,
         ArrayUtils.fill(dest, 0);
 
         int row;
-int col;
+        int col;
 
         for(int i=0; i<rows1; i++) {
+            int destRowOffset = i*cols2;
+
             // Loop over non-zero entries of sparse matrix.
             for(int j=0; j<src2.length; j++) {
                 row = rowIndices[j];
                 col = colIndices[j];
 
-                dest[i*cols2 + col] = dest[i*cols2 + col].add(src1[i*cols1 + row].mult(src2[j]));
+                dest[destRowOffset + col] = dest[destRowOffset + col].add(src1[i*cols1 + row].mult(src2[j]));
             }
         }
 
@@ -287,15 +296,20 @@ public static CNumber[] concurrentStandard(CNumber[] src1, Shape shape1, double[
         CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, i -> {
-            // Loop over non-zero entries of sparse matrix.
-            for(int j=0; j<src2.length; j++) {
-                int row = rowIndices[j];
-                int col = colIndices[j];
-                CNumber product = src1[i*cols1 + row].mult(src2[j]);
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int destRowOffset = i*cols2;
+                int productOffset = i*cols1;
 
-                synchronized (dest) {
-                    dest[i*cols2 + col] = dest[i*cols2 + col].add(product);
+                // Loop over non-zero entries of sparse matrix.
+                for(int j=0; j<src2.length; j++) {
+                    int row = rowIndices[j];
+                    int col = colIndices[j];
+                    CNumber product = src1[productOffset + row].mult(src2[j]);
+
+                    synchronized (dest) {
+                        dest[destRowOffset + col] = dest[destRowOffset + col].add(product);
+                    }
                 }
             }
         });
@@ -324,15 +338,18 @@ public static CNumber[] concurrentStandard(CNumber[] src1, int[] rowIndices, int
         CNumber[] dest = new CNumber[rows1*cols2];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, src1.length, i -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
+                int rowOffset = row*cols2;
 
-            for(int j=0; j<cols2; j++) {
-                CNumber product = src1[i].mult(src2[col*cols2 + j]);
+                for(int j=0; j<cols2; j++) {
+                    CNumber product = src1[i].mult(src2[col*cols2 + j]);
 
-                synchronized (dest) {
-                    dest[row*cols2 + j] = dest[row*cols2 + j].add(product);
+                    synchronized (dest) {
+                        dest[rowOffset + j] = dest[rowOffset + j].add(product);
+                    }
                 }
             }
         });
@@ -362,11 +379,14 @@ public static CNumber[] standardVector(double[] src1, Shape shape1, CNumber[] sr
 
         for(int i=0; i<denseRows; i++) {
             int src1RowOffset = i*denseCols;
+            CNumber sum = dest[i];
 
             for(int j=0; j<nonZeros; j++) {
                 k = indices[j];
-                dest[i] = dest[i].add(src2[j].mult(src1[src1RowOffset + k]));
+                sum = sum.add(src2[j].mult(src1[src1RowOffset + k]));
             }
+
+            dest[i] = sum;
         }
 
         return dest;
@@ -394,7 +414,6 @@ public static CNumber[] standardVector(double[] src1, int[] rowIndices, int[] co
         for(int i=0; i<src1.length; i++) {
             row = rowIndices[i];
             col = colIndices[i];
-
             dest[row] = dest[row].add(src2[col].mult(src1[i]));
         }
 
@@ -420,17 +439,19 @@ public static CNumber[] blockedVector(double[] src1, Shape shape1, CNumber[] src
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-
         // Blocked matrix-vector multiply
         for(int ii=0; ii<rows1; ii += bsize) {
             for(int jj=0; jj<rows2; jj += bsize) {
                 // Multiply the current blocks
                 for(int i=ii; i<ii+bsize && i<rows1; i++) {
                     int src1RowOffset = i*cols1;
+                    CNumber sum = dest[i];
 
                     for(int j=jj; j<jj+bsize && j<rows2; j++) {
-                        dest[i] = dest[i].add(src2[j].mult(src1[src1RowOffset + indices[j]]));
+                        sum = sum.add(src2[j].mult(src1[src1RowOffset + indices[j]]));
                     }
+
+                    dest[i] = sum;
                 }
             }
         }
@@ -455,10 +476,16 @@ public static CNumber[] concurrentStandardVector(double[] src1, Shape shape1, CN
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, i -> {
-            for(int j=0; j<rows2; j++) {
-                int k = indices[j];
-                dest[i] = dest[i].add(src2[j].mult(src1[i*cols1 + k]));
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                CNumber sum = dest[i];
+
+                for(int j=0; j<rows2; j++) {
+                    int k = indices[j];
+                    sum = sum.add(src2[j].mult(src1[i*cols1 + k]));
+                }
+
+                dest[i] = sum;
             }
         });
 
@@ -482,14 +509,15 @@ public static CNumber[] concurrentStandardVector(double[] src1, int[] rowIndices
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
+                CNumber product = src2[col].mult(src1[i]);
 
-        ThreadManager.concurrentLoop(0, src1.length, i -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
-            CNumber product = src2[col].mult(src1[i]);
-
-            synchronized (dest) {
-                dest[row] = dest[row].add(product);
+                synchronized (dest) {
+                    dest[row] = dest[row].add(product);
+                }
             }
         });
 
@@ -516,13 +544,19 @@ public static CNumber[] concurrentBlockedVector(double[] src1, Shape shape1, CNu
         ArrayUtils.fill(dest, 0);
 
         // Blocked matrix-vector multiply
-        ThreadManager.concurrentLoop(0, rows1, bsize, ii -> {
-            for(int jj=0; jj<rows2; jj += bsize) {
-                // Multiply the current blocks
-                for(int i=ii; i<ii+bsize && i<rows1; i++) {
-                    for(int j=jj; j<jj+bsize && j<rows2; j++) {
-                        int k = indices[j];
-                        dest[i] = dest[i].add(src2[j].mult(src1[i*cols1 + k]));
+        ThreadManager.concurrentBlockedOperation(rows1, bsize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii += bsize) {
+                for(int jj=0; jj<rows2; jj += bsize) {
+                    // Multiply the current blocks
+                    for(int i=ii; i<ii+bsize && i<rows1; i++) {
+                        CNumber sum = dest[i];
+
+                        for(int j=jj; j<jj+bsize && j<rows2; j++) {
+                            int k = indices[j];
+                            sum = sum.add(src2[j].mult(src1[i*cols1 + k]));
+                        }
+
+                        dest[i] = sum;
                     }
                 }
             }
@@ -550,10 +584,14 @@ public static CNumber[] standardVector(CNumber[] src1, Shape shape1, double[] sr
         int k;
 
         for(int i=0; i<denseRows; i++) {
+            CNumber sum = dest[i];
+
             for(int j=0; j<nonZeros; j++) {
                 k = indices[j];
-                dest[i] = dest[i].add(src1[i*denseCols + k].mult(src2[j]));
+                sum = sum.add(src1[i*denseCols + k].mult(src2[j]));
             }
+
+            dest[i] = sum;
         }
 
         return dest;
@@ -581,7 +619,6 @@ public static CNumber[] standardVector(CNumber[] src1, int[] rowIndices, int[] c
         for(int i=0; i<src1.length; i++) {
             row = rowIndices[i];
             col = colIndices[i];
-
             dest[row] = dest[row].add(src1[i].mult(src2[col]));
         }
 
@@ -613,10 +650,14 @@ public static CNumber[] blockedVector(CNumber[] src1, Shape shape1, double[] src
             for(int jj=0; jj<rows2; jj += bsize) {
                 // Multiply the current blocks
                 for(int i=ii; i<ii+bsize && i<rows1; i++) {
+                    CNumber sum = dest[i];
+
                     for(int j=jj; j<jj+bsize && j<rows2; j++) {
                         k = indices[j];
-                        dest[i] = dest[i].add(src1[i*cols1 + k].mult(src2[j]));
+                        sum = sum.add(src1[i*cols1 + k].mult(src2[j]));
                     }
+
+                    dest[i] = sum;
                 }
             }
         }
@@ -641,10 +682,16 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, Shape shape1, d
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, rows1, i -> {
-            for(int j=0; j<rows2; j++) {
-                int k = indices[j];
-                dest[i] = dest[i].add(src1[i*cols1 + k].mult(src2[j]));
+        ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                CNumber sum = dest[i];
+
+                for(int j=0; j<rows2; j++) {
+                    int k = indices[j];
+                    sum = sum.add(src1[i*cols1 + k].mult(src2[j]));
+                }
+
+                dest[i] = sum;
             }
         });
 
@@ -668,13 +715,15 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, int[] rowIndice
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, src1.length, i -> {
-            int row = rowIndices[i];
-            int col = colIndices[i];
-            CNumber product = src1[i].mult(src2[col]);
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int row = rowIndices[i];
+                int col = colIndices[i];
+                CNumber product = src1[i].mult(src2[col]);
 
-            synchronized (dest) {
-                dest[row] = dest[row].add(product);
+                synchronized (dest) {
+                    dest[row] = dest[row].add(product);
+                }
             }
         });
 
@@ -701,13 +750,19 @@ public static CNumber[] concurrentBlockedVector(CNumber[] src1, Shape shape1, do
         ArrayUtils.fill(dest, 0);
 
         // Blocked matrix-vector multiply
-        ThreadManager.concurrentLoop(0, rows1, bsize, ii -> {
-            for(int jj=0; jj<rows2; jj += bsize) {
-                // Multiply the current blocks
-                for(int i=ii; i<ii+bsize && i<rows1; i++) {
-                    for(int j=jj; j<jj+bsize && j<rows2; j++) {
-                        int k = indices[j];
-                        dest[i] = dest[i].add(src1[i*cols1 + k].mult(src2[j]));
+        ThreadManager.concurrentBlockedOperation(rows1, bsize, (startIdx, endIdx) -> {
+            for(int ii=startIdx; ii<endIdx; ii += bsize) {
+                for(int jj=0; jj<rows2; jj += bsize) {
+                    // Multiply the current blocks
+                    for(int i=ii; i<ii+bsize && i<rows1; i++) {
+                        CNumber sum = dest[i];
+
+                        for(int j=jj; j<jj+bsize && j<rows2; j++) {
+                            int k = indices[j];
+                            sum = sum.add(src1[i*cols1 + k].mult(src2[j]));
+                        }
+
+                        dest[i] = sum;
                     }
                 }
             }
diff --git a/src/main/java/org/flag4j/operations/sparse/coo/complex/ComplexSparseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/sparse/coo/complex/ComplexSparseMatrixMultiplication.java
index 28d72a578..7a6aafec5 100644
--- a/src/main/java/org/flag4j/operations/sparse/coo/complex/ComplexSparseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/sparse/coo/complex/ComplexSparseMatrixMultiplication.java
@@ -120,17 +120,19 @@ public static CNumber[] concurrentStandard(CNumber[] src1, int[] rowIndices1, in
         // and value is a list of indices in src2 where this row appears.
         Map<Integer, List<Integer>> map = SparseUtils.createMap(src2.length, rowIndices2);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i)->{
-            int c1 = colIndices1[i]; // = k
-
-            // Check if any values in src2 have the same row index as the column index of the value in src1.
-            if(map.containsKey(c1)) {
-                int r1 = rowIndices1[i]; // = i
-                int rowIdx = r1*cols2;
-
-                for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
-                    int idx = rowIdx + colIndices2[j];
-                    destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src1[i].mult(src2[j])));
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int c1 = colIndices1[i]; // = k
+
+                // Check if any values in src2 have the same row index as the column index of the value in src1.
+                if(map.containsKey(c1)) {
+                    int r1 = rowIndices1[i]; // = i
+                    int rowIdx = r1*cols2;
+
+                    for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
+                        int idx = rowIdx + colIndices2[j];
+                        destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src1[i].mult(src2[j])));
+                    }
                 }
             }
         });
@@ -200,18 +202,20 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, int[] rowIndice
         CNumber[] dest = new CNumber[rows1];
         ArrayUtils.fill(dest, 0);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int r1 = rowIndices1[i]; // = i
-            int c1 = colIndices1[i]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int r1 = rowIndices1[i]; // = i
+                int c1 = colIndices1[i]; // = k
 
-            for(int j=0; j<src2.length; j++) {
-                int r2 = indices[j]; // = k
+                for(int j=0; j<src2.length; j++) {
+                    int r2 = indices[j]; // = k
 
-                if(c1==r2) { // Then we multiply and add to sum.
-                    CNumber product = src1[i].mult(src2[j]);
+                    if(c1==r2) { // Then we multiply and add to sum.
+                        CNumber product = src1[i].mult(src2[j]);
 
-                    synchronized (dest) {
-                        dest[r1] = dest[r1].add(product);
+                        synchronized (dest) {
+                            dest[r1] = dest[r1].add(product);
+                        }
                     }
                 }
             }
diff --git a/src/main/java/org/flag4j/operations/sparse/coo/real/RealSparseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/sparse/coo/real/RealSparseMatrixMultiplication.java
index 8bcccf3db..ff4e05ba5 100644
--- a/src/main/java/org/flag4j/operations/sparse/coo/real/RealSparseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/sparse/coo/real/RealSparseMatrixMultiplication.java
@@ -72,6 +72,7 @@ public static double[] standard(double[] src1, int[] rowIndices1, int[] colIndic
 
         for(int i=0; i<src1.length; i++) {
             int c1 = colIndices1[i]; // = k
+            double src1Val = src1[i];
 
             // Check if any values in src2 have the same row index as the column index of the value in src1.
             if(map.containsKey(c1)) {
@@ -80,7 +81,7 @@ public static double[] standard(double[] src1, int[] rowIndices1, int[] colIndic
 
                 for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
                     int c2 = colIndices2[j]; // = j
-                    dest[rowIdx + c2] += src1[i]*src2[j];
+                    dest[rowIdx + c2] += src1Val*src2[j];
                 }
             }
         }
@@ -117,17 +118,20 @@ public static double[] concurrentStandard(double[] src1, int[] rowIndices1, int[
         // and value is a list of indices in src2 where this row appears.
         Map<Integer, List<Integer>> map = SparseUtils.createMap(src2.length, rowIndices2);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i)->{
-            int c1 = colIndices1[i]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int c1 = colIndices1[i]; // = k
+                double src1Val = src1[i];
 
-            // Check if any values in src2 have the same row index as the column index of the value in src1.
-            if(map.containsKey(c1)) {
-                int r1 = rowIndices1[i]; // = i
-                int rowIdx = r1*cols2;
+                // Check if any values in src2 have the same row index as the column index of the value in src1.
+                if(map.containsKey(c1)) {
+                    int r1 = rowIndices1[i]; // = i
+                    int rowIdx = r1*cols2;
 
-                for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
-                    int idx = rowIdx + colIndices2[j];
-                    destMap.put(idx, destMap.getOrDefault(idx, 0d) + src1[i]*src2[j]);
+                    for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
+                        int idx = rowIdx + colIndices2[j];
+                        destMap.put(idx, destMap.getOrDefault(idx, 0d) + src1Val*src2[j]);
+                    }
                 }
             }
         });
@@ -195,20 +199,23 @@ public static double[] concurrentStandardVector(double[] src1, int[] rowIndices1
         int rows1 = shape1.get(0);
         double[] dest = new double[rows1];
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int r1 = rowIndices1[i]; // = i
-            int c1 = colIndices1[i]; // = k
-
-            for(int j=0; j<src2.length; j++) {
-                int r2 = indices[j]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int r1 = rowIndices1[i]; // = i
+                int c1 = colIndices1[i]; // = k
+                double src1Val = src1[i];
+                double sum = dest[r1];
 
-                if(c1==r2) { // Then we multiply and add to sum.
-                    double product = src1[i]*src2[j];
+                for(int j=0; j<src2.length; j++) {
+                    int r2 = indices[j]; // = k
 
-                    synchronized (dest) {
-                        dest[r1] += product;
+                    if(c1==r2) { // Then we multiply and add to sum.
+                        double product = src1Val*src2[j];
+                        sum += product;
                     }
                 }
+
+                dest[r1] = sum;
             }
         });
 
diff --git a/src/main/java/org/flag4j/operations/sparse/coo/real_complex/RealComplexSparseMatrixMultiplication.java b/src/main/java/org/flag4j/operations/sparse/coo/real_complex/RealComplexSparseMatrixMultiplication.java
index 1657e4d23..032fafd47 100644
--- a/src/main/java/org/flag4j/operations/sparse/coo/real_complex/RealComplexSparseMatrixMultiplication.java
+++ b/src/main/java/org/flag4j/operations/sparse/coo/real_complex/RealComplexSparseMatrixMultiplication.java
@@ -76,6 +76,7 @@ public static CNumber[] standard(CNumber[] src1, int[] rowIndices1, int[] colInd
 
         for(int i=0; i<src1.length; i++) {
             int c1 = colIndices1[i]; // = k
+            var src1Value = src1[i];
 
             // Check if any values in src2 have the same row index as the column index of the value in src1.
             if(map.containsKey(c1)) {
@@ -84,7 +85,7 @@ public static CNumber[] standard(CNumber[] src1, int[] rowIndices1, int[] colInd
 
                 for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
                     int c2 = colIndices2[j]; // = j
-                    dest[rowIdx + c2] = dest[rowIdx + c2].add(src1[i].mult(src2[j]));
+                    dest[rowIdx + c2] = dest[rowIdx + c2].add(src1Value.mult(src2[j]));
                 }
             }
         }
@@ -119,17 +120,20 @@ public static CNumber[] concurrentStandard(CNumber[] src1, int[] rowIndices1, in
         // and value is a list of indices in src2 where this row appears.
         Map<Integer, List<Integer>> map = SparseUtils.createMap(src2.length, rowIndices2);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i)->{
-            int c1 = colIndices1[i]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int c1 = colIndices1[i]; // = k
+                var src1Value = src1[i];
 
-            // Check if any values in src2 have the same row index as the column index of the value in src1.
-            if(map.containsKey(c1)) {
-                int r1 = rowIndices1[i]; // = i
-                int rowIdx = r1*cols2;
+                // Check if any values in src2 have the same row index as the column index of the value in src1.
+                if(map.containsKey(c1)) {
+                    int r1 = rowIndices1[i]; // = i
+                    int rowIdx = r1*cols2;
 
-                for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
-                    int idx = rowIdx + colIndices2[j];
-                    destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src1[i].mult(src2[j])));
+                    for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
+                        int idx = rowIdx + colIndices2[j];
+                        destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src1Value.mult(src2[j])));
+                    }
                 }
             }
         });
@@ -169,14 +173,18 @@ public static CNumber[] standardVector(CNumber[] src1, int[] rowIndices1, int[]
         for(int i=0; i<src1.length; i++) {
             r1 = rowIndices1[i]; // = i
             c1 = colIndices1[i]; // = k
+            var sum = dest[r1*cols2];
+            var src1Value = src1[i];
 
             for(int j=0; j<src2.length; j++) {
                 r2 = indices[j]; // = k
 
                 if(c1==r2) { // Then we multiply and add to sum.
-                    dest[r1*cols2] = dest[r1*cols2].add(src1[i].mult(src2[j]));
+                    sum = sum.add(src1Value.mult(src2[j]));
                 }
             }
+
+            dest[r1*cols2] = sum;
         }
 
         return dest;
@@ -204,20 +212,25 @@ public static CNumber[] concurrentStandardVector(CNumber[] src1, int[] rowIndice
         CNumber[] dest = new CNumber[rows1*cols2];
         Arrays.fill(dest, CNumber.ZERO);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int r1 = rowIndices1[i]; // = i
-            int c1 = colIndices1[i]; // = k
-
-            for(int j=0; j<src2.length; j++) {
-                int r2 = indices[j]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int r1 = rowIndices1[i]; // = i
+                int c1 = colIndices1[i]; // = k
+                CNumber sum = dest[r1*cols2];
+                CNumber src1Value = src1[i];
 
-                if(c1==r2) { // Then we multiply and add to sum.
-                    CNumber product = src1[i].mult(src2[j]);
+                for(int j=0; j<src2.length; j++) {
+                    int r2 = indices[j]; // = k
 
-                    synchronized (dest) {
-                        dest[r1*cols2] = dest[r1*cols2].add(product);
+                    if(c1==r2) { // Then we multiply and add to sum.
+                        CNumber product = src1Value.mult(src2[j]);
+                        sum = sum.add(product);
                     }
                 }
+
+                synchronized (dest) {
+                    dest[r1*cols2] = sum;
+                }
             }
         });
 
@@ -251,6 +264,7 @@ public static CNumber[] standard(double[] src1, int[] rowIndices1, int[] colIndi
 
         for(int i=0; i<src1.length; i++) {
             int c1 = colIndices1[i]; // = k
+            var src1Value = src1[i];
 
             // Check if any values in src2 have the same row index as the column index of the value in src1.
             if(map.containsKey(c1)) {
@@ -259,7 +273,7 @@ public static CNumber[] standard(double[] src1, int[] rowIndices1, int[] colIndi
 
                 for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
                     int c2 = colIndices2[j]; // = j
-                    dest[rowIdx + c2] = dest[rowIdx + c2].add(src2[j].mult(src1[i]));
+                    dest[rowIdx + c2] = dest[rowIdx + c2].add(src2[j].mult(src1Value));
                 }
             }
         }
@@ -294,17 +308,20 @@ public static CNumber[] concurrentStandard(double[] src1, int[] rowIndices1, int
         // and value is a list of indices in src2 where this row appears.
         Map<Integer, List<Integer>> map = SparseUtils.createMap(src2.length, rowIndices2);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i)->{
-            int c1 = colIndices1[i]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int c1 = colIndices1[i]; // = k
+                var src1Value = src1[i];
 
-            // Check if any values in src2 have the same row index as the column index of the value in src1.
-            if(map.containsKey(c1)) {
-                int r1 = rowIndices1[i]; // = i
-                int rowIdx = r1*cols2;
+                // Check if any values in src2 have the same row index as the column index of the value in src1.
+                if(map.containsKey(c1)) {
+                    int r1 = rowIndices1[i]; // = i
+                    int rowIdx = r1*cols2;
 
-                for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
-                    int idx = rowIdx + colIndices2[j];
-                    destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src2[j].mult(src1[i])));
+                    for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
+                        int idx = rowIdx + colIndices2[j];
+                        destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src2[j].mult(src1Value)));
+                    }
                 }
             }
         });
@@ -333,9 +350,8 @@ public static CNumber[] standardVector(double[] src1, int[] rowIndices1, int[] c
                                            CNumber[] src2, int[] indices, Shape shape2) {
 
         int rows1 = shape1.get(0);
-        int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        CNumber[] dest = new CNumber[rows1];
         Arrays.fill(dest, CNumber.ZERO);
 
         // r1, c1, r2, and store the indices for non-zero values in src1 and src2.
@@ -344,14 +360,18 @@ public static CNumber[] standardVector(double[] src1, int[] rowIndices1, int[] c
         for(int i=0; i<src1.length; i++) {
             r1 = rowIndices1[i]; // = i
             c1 = colIndices1[i]; // = k
+            var src1Value = src1[i];
+            var sum = dest[r1];
 
             for(int j=0; j<src2.length; j++) {
                 r2 = indices[j]; // = k
 
                 if(c1==r2) { // Then we multiply and add to sum.
-                    dest[r1*cols2] = dest[r1*cols2].add(src2[j].mult(src1[i]));
+                    sum = sum.add(src2[j].mult(src1Value));
                 }
             }
+
+            dest[r1] = sum;
         }
 
         return dest;
@@ -374,25 +394,29 @@ public static CNumber[] concurrentStandardVector(double[] src1, int[] rowIndices
                                                      CNumber[] src2, int[] indices, Shape shape2) {
 
         int rows1 = shape1.get(0);
-        int cols2 = shape2.get(1);
 
-        CNumber[] dest = new CNumber[rows1*cols2];
+        CNumber[] dest = new CNumber[rows1];
         Arrays.fill(dest, CNumber.ZERO);
 
-        ThreadManager.concurrentLoop(0, src1.length, (i) -> {
-            int r1 = rowIndices1[i]; // = i
-            int c1 = colIndices1[i]; // = k
-
-            for(int j=0; j<src2.length; j++) {
-                int r2 = indices[j]; // = k
+        ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+            for(int i=startIdx; i<endIdx; i++) {
+                int r1 = rowIndices1[i]; // = i
+                int c1 = colIndices1[i]; // = k
+                var sum = dest[r1];
+                var src1Value = src1[i];
 
-                if(c1==r2) { // Then we multiply and add to sum.
-                    CNumber product = src2[j].mult(src1[i]);
+                for(int j=0; j<src2.length; j++) {
+                    int r2 = indices[j]; // = k
 
-                    synchronized (dest) {
-                        dest[r1*cols2] = dest[r1*cols2].add(product);
+                    if(c1==r2) { // Then we multiply and add to sum.
+                        CNumber product = src2[j].mult(src1Value);
+                        sum = sum.add(product);
                     }
                 }
+
+                synchronized (dest) {
+                    dest[r1] = sum;
+                }
             }
         });
 
diff --git a/src/main/java/org/flag4j/util/ArrayUtils.java b/src/main/java/org/flag4j/util/ArrayUtils.java
index bee513f06..5b1f2d32c 100644
--- a/src/main/java/org/flag4j/util/ArrayUtils.java
+++ b/src/main/java/org/flag4j/util/ArrayUtils.java
@@ -337,9 +337,8 @@ public static void fill(final CNumber[][] dest, final CNumber fillValue) {
      */
     public static void fill(final CNumber[] dest, final double fillValue, final int from, final int to) {
         ParameterChecks.assertLessEq(to, from + 1);
-
-        for (int i = from; i < to; i++)
-            dest[i] = new CNumber(fillValue);
+        CNumber complexFillValue = new CNumber(fillValue);
+        Arrays.fill(dest, from, to, complexFillValue);
     }
 
 
@@ -355,8 +354,7 @@ public static void fill(final CNumber[] dest, final double fillValue, final int
      * @throws ArrayIndexOutOfBoundsException If {@code start} or {@code end} is not within the destination array.
      */
     public static void fill(final CNumber[] dest, final int start, final int end, final CNumber fillValue) {
-        for (int i = start; i < end; i++)
-            dest[i] = fillValue;
+        Arrays.fill(dest, start, end, fillValue);
     }
 
 
@@ -535,6 +533,25 @@ public static void swap(final int[] src, final int[] indices) {
     }
 
 
+    /**
+     * Swaps elements in an array according to a specified permutation. This method should be used with extreem caution as unlike
+     * {@link #swap(int[], int[])}, this method does <b>not</b> verify that {@code indices} is a permutation.
+     *
+     * @param src     Array to swap elements within.
+     * @param indices Array containing indices of the permutation. If the {@code src} array has length {@code N}, then
+     *                the array must be a permutation of {@code {0, 1, 2, ..., N-1}}.
+     */
+    public static void swapUnsafe(final int[] src, final int[] indices) {
+        int[] swapped = new int[src.length];
+        int i = 0;
+
+        for(int value : indices)
+            swapped[i++] = src[value];
+
+        System.arraycopy(swapped, 0, src, 0, swapped.length);
+    }
+
+
     /**
      * Swaps to elements in an array. This is done in place.
      *
diff --git a/target/flag4j-v0.1.0-beta.jar b/target/flag4j-v0.1.0-beta.jar
index c4c2f7ad1..af96840b8 100644
Binary files a/target/flag4j-v0.1.0-beta.jar and b/target/flag4j-v0.1.0-beta.jar differ