- * WARNING: These methods do not perform any sanity checks.
+ * This class contains several low level methods for computing complex matrix-matrix multiplications. This includes transpose
+ * multiplications.
+ *
+ * WARNING: These methods do not perform any sanity checks.
*/
public final class ComplexDenseMatrixMultiplication {
@@ -72,11 +73,14 @@ public static CNumber[] standard(CNumber[] src1, Shape shape1, CNumber[] src2, S
src1Index = src1IndexStart;
destIndex = destIndexStart + j;
end = src1Index + rows2;
+ CNumber sum = dest[destIndex];
while(src1Index {
- int src1IndexStart = i*cols1;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int src1IndexStart = i*rows2;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int iBound = Math.min(ii + blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int iBound = Math.min(ii + blockSize, rows1);
-
- for(int kk = 0; kk {
+ for(int ii=startIdx; ii {
- int src1Index = i*cols1;
- int src2Index = 0;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int iBound = Math.min(ii+blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int[] destIndices = shape.getIndices(i);
- ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
- dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+ ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int[] destIndices = shape.getIndices(i);
- ArrayUtils.swap(destIndices, axes); // Compute destination indices.
- dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+ // Compute transpose concurrently.
+ ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int srcIndex = i;
- int destIndex = i*numRows;
- int end = destIndex + numRows;
-
- while (destIndex < end) {
- dest[destIndex++] = src[srcIndex];
- srcIndex += numCols;
+ ThreadManager.concurrentOperation(numCols, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- for(int j=0; j {
+ for(int i=startIdx; i {
- int[] destIndices = shape.getIndices(i);
- ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
- dest[destShape.entriesIndex(destIndices)] = src[i].conj(); // Apply transpose for the element
+ ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int[] destIndices = shape.getIndices(i);
- ArrayUtils.swap(destIndices, axes); // Compute destination indices.
- dest[destShape.entriesIndex(destIndices)] = src[i].conj(); // Apply conjugate transpose for the element
+ ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int srcIndex = i;
- int destIndex = i*numRows;
- int end = destIndex + numRows;
-
- while (destIndex < end) {
- dest[destIndex++] = src[srcIndex].conj();
- srcIndex += numCols;
+ ThreadManager.concurrentOperation(numCols, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- for(int j=0; j {
+ for(int i=startIdx; i product[i] = src1[i]/src2[i]
- );
+ ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i product[i] = src1[i]*src2[i]
- );
+ ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int src1IndexStart = i*cols2;
- int destIndexStart = i*rows2;
- int end = src1IndexStart + cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i{
- int iBound = Math.min(ii + blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int src1IndexStart = i*cols1;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startRow, endRow) -> {
+ for (int i = startRow; i < endRow; i++) {
+ int src1IndexStart = i * cols1;
+ int destIndexStart = i * cols2;
- for(int j=0; j {
- int src1IndexStart = i*rows2;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startRow, endRow) -> {
+ for(int i = startRow; i {
- int iBound = Math.min(ii + blockSize, rows1);
-
- for(int jj = 0; jj {
+ for (int jj = 0; jj < cols2; jj += blockSize) {
int jBound = Math.min(jj + blockSize, cols2);
- for(int kk = 0; kk {
- int iBound = Math.min(ii + blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (blockStart, blockEnd) -> {
+ for(int ii=blockStart; ii {
- int src1Index = i*cols1;
- int src2Index = 0;
+ ThreadManager.concurrentOperation(rows1, (rowStart, rowEnd) -> {
+ for(int i=rowStart; i {
- int iBound = Math.min(ii+blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (rowStart, rowEnd) -> {
+ for(int ii=rowStart; ii {
- int[] destIndices = shape.getIndices(i);
- ArrayUtils.swap(destIndices, axes); // Compute destination indices.
- dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+ ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int[] destIndices = shape.getIndices(i);
- ArrayUtils.swap(destIndices, axis1, axis2); // Compute destination indices.
- dest[destShape.entriesIndex(destIndices)] = src[i]; // Apply transpose for the element
+ ThreadManager.concurrentOperation(src.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int srcIndex = i;
- int destIndex = i*numRows;
- int end = destIndex + numRows;
-
- while (destIndex < end) {
- dest[destIndex++] = src[srcIndex];
- srcIndex += numCols;
+ ThreadManager.concurrentOperation(numCols, (startIdx, endIdx) -> {
+ for(int i=startIdx; i{
- int blockHeight = Math.min(ii+blockSize, numRows) - ii;
- int srcIndexStart = ii*numCols;
- int destIndexStart = ii;
-
- for(int jj=0; jj {
+ for(int ii=startIdx; ii= warmupRuns) {
- bTime += (bEnd-bStart)*10e-6;
- sTime += (sEnd-sStart)*10e-6;
- }
- }
-
- System.out.printf("Shape: (%d, %d)\n\n", rows, cols);
- System.out.printf("Standard Time: %.5f ms\n", sTime/numRuns);
- System.out.printf("Blocked Time: %.5f ms\n", bTime/numRuns);
- }
}
diff --git a/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java b/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java
index 81865c709..2dc0de633 100644
--- a/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java
+++ b/src/main/java/org/flag4j/operations/dense/real/RealDenseVectorOperations.java
@@ -97,12 +97,14 @@ public static double[] outerProduct(double[] src1, double[] src2) {
public static double[] outerProductConcurrent(double[] src1, double[] src2) {
double[] dest = new double[src1.length*src2.length];
- ThreadManager.concurrentLoop(0, src1.length, (int i)->{
- int destIndex = i*src2.length;
- double v1 = src1[i];
-
- for(double v2 : src2) {
- dest[destIndex++] = v1*v2;
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; iproduct[i] = src1[i].div(src2[i])
- );
+ ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- double divisor = src2[i].re*src2[i].re + src2[i].im*src2[i].im;
- quotient[i] = new CNumber(src1[i]*src2[i].re / divisor, -src1[i]*src2[i].im / divisor);
+ ThreadManager.concurrentOperation(quotient.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; iproduct[i] = src1[i].mult(src2[i])
- );
+ ThreadManager.concurrentOperation(product.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
+ for(int i=startIdx; i {
- int src1IndexStart = i*cols2;
- int destIndexStart = i*rows2;
- int end = src1IndexStart + cols2;
-
- for(int j=0; j{
- int iBound = Math.min(ii + blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int src1IndexStart = i*cols2;
- int destIndexStart = i*rows2;
- int end = src1IndexStart + cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
+ for(int ii=startIdx; ii{
- int iBound = Math.min(ii + blockSize, rows1);
+ for(int jj = 0; jj {
- int src1IndexStart = i*cols1;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int src1IndexStart = i*rows2;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int iBound = Math.min(ii + blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int iBound = Math.min(ii + blockSize, rows1);
+ final int blockSize = Configurations.getBlockSize();
- for(int kk = 0; kk {
+ for(int ii=startIdx; ii {
- int src1Index = i*cols1;
- int src2Index = 0;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int iBound = Math.min(ii+blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int src1IndexStart = i*cols1;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int src1IndexStart = i*rows2;
- int destIndexStart = i*cols2;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int iBound = Math.min(ii + blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- int iBound = Math.min(ii + blockSize, rows1);
+ final int blockSize = Configurations.getBlockSize();
- for(int kk = 0; kk {
+ for(int ii=startIdx; ii {
- int src1Index = i*cols1;
- int src2Index = 0;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int iBound = Math.min(ii+blockSize, rows1);
+ ThreadManager.concurrentBlockedOperation(rows1, blockSize, (startIdx, endIdx) -> {
+ for(int ii=startIdx; ii {
- // Loop over non-zero entries of sparse matrix.
- for(int j=0; j {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- CNumber val = dest[i];
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- for(int jj=0; jj {
+ for(int ii=startIdx; ii {
- double[] localResult = new double[cols2]; // Store the result for the local thread.
- int destRow = i*cols2;
- int src1Row = i*cols1;
+ ThreadManager.concurrentOperation(rows1, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int r1 = rowIndices[i];
- int c1 = colIndices[i];
-
- int destRowStart = r1 * cols2;
- int src2RowStart = c1 * cols2;
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i destAtomic = new AtomicReferenceArray<>(rows1 * cols2);
- for(int i=0; i {
- int row = rowIndices1[i];
- int col = colIndices1[i];
- int destRow = row*cols2;
- int src2Row = col*cols2;
- for(int j=0; j {
- for(int j=0; j {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- for(int jj=0; jj {
+ for(int ii=startIdx; ii {
- // Loop over non-zero entries of sparse matrix.
- for(int j=0; j {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(var i=startIdx; i {
- // Loop over non-zero entries of sparse matrix.
- for(int j=0; j {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- for(int j=0; j {
+ for(int i=startIdx; i {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
- CNumber product = src2[col].mult(src1[i]);
-
- synchronized (dest) {
- dest[row] = dest[row].add(product);
+ synchronized (dest) {
+ dest[row] = dest[row].add(product);
+ }
}
});
@@ -516,13 +544,19 @@ public static CNumber[] concurrentBlockedVector(double[] src1, Shape shape1, CNu
ArrayUtils.fill(dest, 0);
// Blocked matrix-vector multiply
- ThreadManager.concurrentLoop(0, rows1, bsize, ii -> {
- for(int jj=0; jj {
+ for(int ii=startIdx; ii {
- for(int j=0; j {
+ for(int i=startIdx; i {
- int row = rowIndices[i];
- int col = colIndices[i];
- CNumber product = src1[i].mult(src2[col]);
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- for(int jj=0; jj {
+ for(int ii=startIdx; ii> map = SparseUtils.createMap(src2.length, rowIndices2);
- ThreadManager.concurrentLoop(0, src1.length, (i)->{
- int c1 = colIndices1[i]; // = k
-
- // Check if any values in src2 have the same row index as the column index of the value in src1.
- if(map.containsKey(c1)) {
- int r1 = rowIndices1[i]; // = i
- int rowIdx = r1*cols2;
-
- for(int j : map.get(c1)) { // Iterate over all entries in src2 where rowIndices[j] == colIndices[j]
- int idx = rowIdx + colIndices2[j];
- destMap.put(idx, destMap.getOrDefault(idx, CNumber.ZERO).add(src1[i].mult(src2[j])));
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int r1 = rowIndices1[i]; // = i
- int c1 = colIndices1[i]; // = k
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i> map = SparseUtils.createMap(src2.length, rowIndices2);
- ThreadManager.concurrentLoop(0, src1.length, (i)->{
- int c1 = colIndices1[i]; // = k
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int r1 = rowIndices1[i]; // = i
- int c1 = colIndices1[i]; // = k
-
- for(int j=0; j {
+ for(int i=startIdx; i> map = SparseUtils.createMap(src2.length, rowIndices2);
- ThreadManager.concurrentLoop(0, src1.length, (i)->{
- int c1 = colIndices1[i]; // = k
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int r1 = rowIndices1[i]; // = i
- int c1 = colIndices1[i]; // = k
-
- for(int j=0; j {
+ for(int i=startIdx; i> map = SparseUtils.createMap(src2.length, rowIndices2);
- ThreadManager.concurrentLoop(0, src1.length, (i)->{
- int c1 = colIndices1[i]; // = k
+ ThreadManager.concurrentOperation(src1.length, (startIdx, endIdx) -> {
+ for(int i=startIdx; i {
- int r1 = rowIndices1[i]; // = i
- int c1 = colIndices1[i]; // = k
-
- for(int j=0; j {
+ for(int i=startIdx; inot verify that {@code indices} is a permutation.
+ *
+ * @param src Array to swap elements within.
+ * @param indices Array containing indices of the permutation. If the {@code src} array has length {@code N}, then
+ * the array must be a permutation of {@code {0, 1, 2, ..., N-1}}.
+ */
+ public static void swapUnsafe(final int[] src, final int[] indices) {
+ int[] swapped = new int[src.length];
+ int i = 0;
+
+ for(int value : indices)
+ swapped[i++] = src[value];
+
+ System.arraycopy(swapped, 0, src, 0, swapped.length);
+ }
+
+
/**
* Swaps to elements in an array. This is done in place.
*
diff --git a/target/flag4j-v0.1.0-beta.jar b/target/flag4j-v0.1.0-beta.jar
index c4c2f7ad1..af96840b8 100644
Binary files a/target/flag4j-v0.1.0-beta.jar and b/target/flag4j-v0.1.0-beta.jar differ