-
Notifications
You must be signed in to change notification settings - Fork 354
Optimization1
Jianyu Huang edited this page Aug 11, 2016
·
7 revisions
Copy the contents of file MMult0.c
into a file named MMult1.c
and change the contents:
from
/* Create macros so that the matrices are stored in column-major order */
#define A(i,j) a[ (j)*lda + (i) ]
#define B(i,j) b[ (j)*ldb + (i) ]
#define C(i,j) c[ (j)*ldc + (i) ]
/* Routine for computing C = A * B + C */
void MY_MMult( int m, int n, int k, double *a, int lda,
double *b, int ldb,
double *c, int ldc )
{
int i, j, p;
for ( i=0; i<m; i++ ){ /* Loop over the rows of C */
for ( j=0; j<n; j++ ){ /* Loop over the columns of C */
for ( p=0; p<k; p++ ){ /* Update C( i,j ) with the inner
product of the ith row of A and
the jth column of B */
C( i,j ) = C( i,j ) + A( i,p ) * B( p,j );
}
}
}
}
to
/* Create macros so that the matrices are stored in column-major order */
#define A(i,j) a[ (j)*lda + (i) ]
#define B(i,j) b[ (j)*ldb + (i) ]
#define C(i,j) c[ (j)*ldc + (i) ]
/* Routine for computing C = A * B + C */
void AddDot( int, double *, int, double *, double * );
void MY_MMult( int m, int n, int k, double *a, int lda,
double *b, int ldb,
double *c, int ldc )
{
int i, j;
for ( j=0; j<n; j+=1 ){ /* Loop over the columns of C */
for ( i=0; i<m; i+=1 ){ /* Loop over the rows of C */
/* Update the C( i,j ) with the inner product of the ith row of A
and the jth column of B */
AddDot( k, &A( i,0 ), lda, &B( 0,j ), &C( i,j ) );
}
}
}
/* Create macro to let X( i ) equal the ith element of x */
#define X(i) x[ (i)*incx ]
void AddDot( int k, double *x, int incx, double *y, double *gamma )
{
/* compute gamma := x' * y + gamma with vectors x and y of length n.
Here x starts at location x with increment (stride) incx and y starts at location y and has (implicit) stride of 1.
*/
int p;
for ( p=0; p<k; p++ ){
*gamma += X( p ) * y[ p ];
}
}
Change the first lines in the makefile
to
OLD := MMult0
NEW := MMult1
make run
octave:3> PlotAll % this will create the plot
This time the performance graph will look something like
We see no performance benefit. What we are doing is slowly changing the code into one where there will be a performance benefit.