diff --git a/src/hamc/MatrixAdd.cu b/src/hamc/MatrixAdd.cu
index b6dcb7d..d560c69 100644
--- a/src/hamc/MatrixAdd.cu
+++ b/src/hamc/MatrixAdd.cu
@@ -3,13 +3,16 @@
 #define ushort unsigned short
 
 __global__ void MatrixAdd(ushort *A, ushort *B, ushort *C,
-                                     int height, int width,) {
-        int ROW = blockIdx.y*blockDim.y + threadIdx.y;
+                                     int height, int width,)
+    int ROW = blockIdx.y*blockDim.y + threadIdx.y;
 	
-	int COL = blockIdx.x*blockDim.x + threadIdx.x;
+    int COL = blockIdx.x*blockDim.x + threadIdx.x;
 	
-	if((ROW < height) && (COL < width)){
-		int address = ROW*width+COL;
-		C[address] = A[i] ^ B[i];
-	}
+    int index = row * N + col;
+ 
+    if( (ROW < numARows) && (COL < numAColumns) )
+    {
+  	C[index] = A[index] ^ B[index];
+  	
+    }
 }
diff --git a/src/tests/MatrixAdd/MatrixAdd_cpu.c b/src/tests/MatrixAdd/MatrixAdd_cpu.c
index fb34c1a..884ad10 100644
--- a/src/tests/MatrixAdd/MatrixAdd_cpu.c
+++ b/src/tests/MatrixAdd/MatrixAdd_cpu.c
@@ -7,23 +7,8 @@
 #define mat_element(mat, cols, row_idx, col_idx) \
   mat[(row_idx * cols) + col_idx]
   
-//initialize the matrix
-ushort mat_init(int rows, int cols)
-{
-  if(rows <= 0 || cols <= 0)
-  {
-    return NULL;
-  }
-  ushort A;
-  A = (ushort)safe_malloc(sizeof(struct matrix));
-  A->cols = cols;
-  A->rows = rows; 
-  A->data = (ushort *)safe_malloc(rows*cols*sizeof(ushort)); 
-  return A;
-}
-
 //Set the value of matix element at position given by the indices to "val"
-void set_matrix_element(ushort A, int row_idx, int col_idx, ushort val)
+void set_matrix_element(bin_matrix A, int row_idx, int col_idx, bin_matrix val)
 {
   if(row_idx < 0 || row_idx >= A->rows || col_idx < 0 || col_idx >= A->cols)
   {
@@ -33,14 +18,14 @@ void set_matrix_element(ushort A, int row_idx, int col_idx, ushort val)
   mat_element(A, row_idx, col_idx) = val;
 }
 
-ushort matrix_add(ushort *A, ushort *B)
+bin_matrix MatrixAdd_cpu(bin_matrix *A, bin_matrix *B)
 {
-    if(A->rows != B->rows || A->cols != B->cols)
-    {
-        printf("Incompatible dimensions for matrix addition. \n");
-        exit(0);
-    }
-    ushort temp mat_init(A->rows, A->cols);
+  //  if(A->rows != B->rows || A->cols != B->cols)
+  //   {
+  //       printf("Incompatible dimensions for matrix addition. \n");
+  //      exit(0);
+  //  }
+  // bin_matrix temp mat_init(A->rows, A->cols);
     for(int i = 0; i < A->rows; i++)
     {
         for(int j = 0; j < A->cols; j++)
diff --git a/src/tests/MatrixAdd/MatrixAdd_cpu.h b/src/tests/MatrixAdd/MatrixAdd_cpu.h
index c52e819..db3cbf3 100644
--- a/src/tests/MatrixAdd/MatrixAdd_cpu.h
+++ b/src/tests/MatrixAdd/MatrixAdd_cpu.h
@@ -8,9 +8,8 @@ extern "C" {
 #define ushort unsigned short
 
 //FUNCTIONS go in here
-ushort mat_init(int rows, int cols);
-void set_matrix_element(ushort A, int row_idx, int col_idx, ushort val);
-ushort add_matrix(ushort *A, ushort *B);
+void set_matrix_element(bin_matrix A, int row_idx, int col_idx, bin_matrix val);
+bin_matrix MatrixAdd_cpu(bin_matrix *A, bin_matrix *B);
 
 
 #ifdef __cplusplus
diff --git a/src/tests/MatrixAdd/main.cu b/src/tests/MatrixAdd/main.cu
index f2b636f..6ffec5d 100644
--- a/src/tests/MatrixAdd/main.cu
+++ b/src/tests/MatrixAdd/main.cu
@@ -10,6 +10,7 @@
 #include "../../hamc/MatrixAdd.cu"
 
 #define TILE_WIDTH 16
+#define ushort unsigned short
 
 #define CUDA_CHECK(ans)                                                   \
   { gpuAssert((ans), __FILE__, __LINE__); }
@@ -23,6 +24,28 @@ inline void gpuAssert(cudaError_t code, const char *file, int line,
   }
 }
 
+#define mat_element(mat, row_idx, col_idx) \
+    mat->data[row_idx * (mat->cols) + col_idx]
+
+typedef struct matrix
+{
+    int rows;
+    int cols;
+    ushort *data;
+
+}*bin_matrix;
+
+void* safe_malloc(size_t n)
+{
+    void* p = malloc(n);
+    if (!p)
+    {
+        fprintf(stderr, "Out of memory(%lu bytes)\n",(size_t)n);
+        exit(EXIT_FAILURE);
+    }
+    return p;
+}
+
 void printHelp()
 {
     printf("run this executable with the following flags\n");
@@ -32,30 +55,85 @@ void printHelp()
     printf("\t-s <solution file name>\n");
 }
 
+bin_matrix run_cpu(bin_matrix A, bin_matrix B)
+{
+    if (A->rows != B->rows || A->cols != B->cols){
+        printf("Matrices are incompatible, check dimensions...\n");
+        exit(0);
+    }
+    
+    return MatrixAdd_cpu(A, B);
+}
 
-void run_cpu(const char *in, const char*sol)
+bin_matrix run_kernel(bin_matrix A, bin_matrix B)
 {
-    int numARows, numAColumns;
-    ushort *hostA = (ushort *)wbImport(in, &numARows, &numAColumns);
-    ushort *hostC = (ushort *)malloc(numARows*numAColumns * sizeof(ushort));
+    if (A->rows != B->rows || A->cols != B->cols){
+        printf("Matrices are incompatible, check dimensions...\n");
+        exit(0);
+    }
+    
+    ushort *deviceA;
+    ushort *deviceB;
+    ushort *deviceC;
+    
+    /* allocate the memory space on GPU */
+    wbTime_start(GPU, "Allocating GPU memory.");
+    cudaMalloc((void **) &deviceA, A->cols * A->rows * sizeof(ushort));
+    cudaMalloc((void **) &deviceB, B->cols * B->rows * sizeof(ushort));
+    cudaMalloc((void **) &deviceC, B->cols * A->rows * sizeof(ushort));
+    wbTime_stop(GPU, "Allocating GPU memory.");
+    
+       
+    cudaMemcpy(deviceA, A->data, A->cols * A->rows * sizeof(ushort), cudaMemcpyHostToDevice);
+    cudaMemcpy(deviceB, B->data, B->cols * B->rows * sizeof(ushort), cudaMemcpyHostToDevice);
+    
+    dim3 DimBlock(TILE_WIDTH, TILE_WIDTH, 1);
+    int x_blocks = ((B->cols - 1)/TILE_WIDTH) + 1;
+    int y_blocks = ((A->rows - 1)/TILE_WIDTH) + 1;
+    dim3 DimGrid(x_blocks, y_blocks, 1);
+    
+    matrixAdd<<<DimGrid, DimBlock>>>(deviceA, deviceB, deviceC, A->rows, A->cols);
+    
+    cudaDeviceSynchronize();
+    
+    wbTime_start(Copy, "Copying output memory to the CPU");
+    cudaMemcpy(C->data, deviceC, B->cols * A->rows * sizeof(ushort), cudaMemcpyDeviceToHost);
+    wbTime_stop(Copy, "Copying output memory to the CPU");
+    
+    wbTime_start(GPU, "Freeing GPU Memory");
+    cudaFree(deviceA);
+    cudaFree(deviceB);
+    cudaFree(deviceC);
+    wbTime_stop(GPU, "Freeing GPU Memory");
 
-    matrix_add(hostA, hostC, numARows, numAColumns);
+    return C;
 }
 
 
 int main(int argc, char *argv[])
 {
     printf("MatrixAdd test:\n");
+    
+    // Variable - Matrices (Device)
     wbArg_t args;
-
-    ushort *hostA; // The A matrix
-    ushort *hostC; // The output C matrix
-    ushort *deviceA; // A matrix on device
-    ushort *deviceB; // B matrix on device (copy of A)
-    ushort *deviceC; // C matrix on device
+    bin_matrix A;
+    bin_matrix B;
+    bin_matrix C;
+    
+    // Variable - Matrices (Host)
+    ushort *hostA;
+    ushort *hostB;
+    ushort *hostC;
+    
+    // Variables - Rows & Cols
     int numARows;    // number of rows in the matrix A
     int numAColumns; // number of columns in the matrix A
-
+    int numBRows;    // number of rows in the matrix B
+    int numBColumns; // number of columns in the matrix B
+    int numCRows;    // number of rows in the matrix C
+    int numCColumns; // number of columns in the matrix C
+    
+    //Inputs
     char *inputFileName;
     char *solutionFileName;
 
@@ -97,49 +175,20 @@ int main(int argc, char *argv[])
 
     /* allocate host data for matrix */
     wbTime_start(Generic, "Importing data and creating memory on host");
+    
     hostA = (ushort *)wbImport(inputFileName, &numARows, &numAColumns);
-    int numBRows = numARows;    // number of rows in the matrix B
-    int numBColumns = numAColumns; // number of columns in the matrix B
-    int numCRows = numARows;    // number of rows in the matrix C
-    int numCColumns = numAColumns; // number of columns in the matrix C
+    A = mat_init(numARows, numACols);
+    A->data = hostA;
+    
+    hostB = (ushort *)wbImport(inputFileName, &numBRows, &numBColumns);
+    B = mat_init(numARows, numACols);
+    B->data = hostB;
+    
     hostC = (ushort *)malloc(numCRows*numCColumns * sizeof(ushort));
+    
     wbTime_stop(Generic, "Importing data and creating memory on host");
 
 
-    wbLog(TRACE, "The dimensions of A are ", numARows, " x ", numAColumns);
-
-
-    /* allocate the memory space on GPU */
-    wbTime_start(GPU, "Allocating GPU memory.");
-    cudaMalloc((void**) &deviceA, numARows * numAColumns * sizeof(ushort));
-    cudaMalloc((void**) &deviceB, numBRows * numBColumns * sizeof(ushort));
-    cudaMalloc((void**) &deviceC, numCRows * numCColumns * sizeof(ushort));
-    wbTime_stop(GPU, "Allocating GPU memory.");
-
-
-    dim3 dimGrid((numCColumns - 1) / 16 + 1, (numCRows - 1) / TILE_WIDTH + 1, 1);
-    dim3 dimBlock(TILE_WIDTH, TILE_WIDTH, 1);
-
-    /* call CUDA kernel to perform computations */
-    wbTime_start(Compute, "Performing CUDA computation for RREF");
-    MatrixAdd_kernel<<<dimGrid, dimBlock>>>(deviceA, deviceB, deviceC, numARows);
-    cudaDeviceSynchronize();
-    wbTime_stop(Compute, "Performing CUDA computation");
-
-
-
-    wbTime_start(Copy, "Copying output memory to the CPU");
-    cudaMemcpy(hostC, deviceC, numCRows * numCColumns * sizeof(ushort), cudaMemcpyDeviceToHost);
-    wbTime_stop(Copy, "Copying output memory to the CPU");
-
-    /* Free GPU Memory */
-    wbTime_start(GPU, "Freeing GPU Memory");
-    cudaFree(deviceA);
-    cudaFree(deviceC);
-    wbTime_stop(GPU, "Freeing GPU Memory");
-
-    wbSolution(args, hostC, numCRows, numCColumns);
-
     free(hostA);
     free(hostB);
     free(hostC);
diff --git a/src/tests/MatrixAdd/sources.cmake b/src/tests/MatrixAdd/sources.cmake
index bd51ac4..918c38b 100644
--- a/src/tests/MatrixAdd/sources.cmake
+++ b/src/tests/MatrixAdd/sources.cmake
@@ -1,3 +1,3 @@
 add_lab("ADD_MATRIX_test")
-add_lab_solution("ADD_MATRIX_test", ${CMAKE_CURRENT_LIST_DIR}/main.cu)
-add_generator("ADD_MATRIX_test" ${CMAKE_CURRENT_LIST_DIR}/dataset_generator.cpp)
+add_lab_solution("ADD_MATRIX_test" ${CMAKE_CURRENT_LIST_DIR}/main.cu)
+#add_generator("ADD_MATRIX_test", ${CMAKE_CURRENT_LIST_DIR}/dataset_generator.cpp)
diff --git a/src/tests/sources.cmake b/src/tests/sources.cmake
index baac856..ccc0763 100644
--- a/src/tests/sources.cmake
+++ b/src/tests/sources.cmake
@@ -1 +1,3 @@
 include(${CMAKE_CURRENT_LIST_DIR}/RREFMatrix/sources.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/MatrixAdd/sources.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/InverseMatrix/sources.cmake)