From bb767a280efa62b9e7b6a8bf3765194e1c93cd25 Mon Sep 17 00:00:00 2001
From: Rafael Sachetto <rsachetto@gmail.com>
Date: Thu, 6 Jun 2024 11:54:54 -0300
Subject: [PATCH] Simple eikonal solver implementation based on
 https://github.com/SCIInstitute/StructuredEikonal

---
 bsbash/find_functions.sh                      |  12 +-
 build.sh                                      |   4 +-
 .../plain_mesh_no_fibrosis_example.ini        |  10 +-
 src/common_types/common_types.h               |  42 +-
 src/eikonal/build.sh                          |  10 +
 src/eikonal/common_def.h                      |  60 +++
 src/eikonal/cuda_fim.cu                       | 226 +++++++++
 src/eikonal/cuda_fim.h                        |  21 +
 src/eikonal/cuda_fim_kernel.cu                | 424 +++++++++++++++++
 src/eikonal/cuda_fim_kernel.h                 |  41 ++
 src/eikonal/eikonal_solver.c                  | 427 ++++++++++++++++++
 src/eikonal/eikonal_solver.h                  |  26 ++
 src/logger/logger.h                           |   2 +
 src/main_eikonal.c                            | 333 ++------------
 src/utils/build.sh                            |   4 +-
 src/utils/file_utils.h                        |   2 -
 src/utils/heap.c                              |  90 ----
 src/utils/heap.h                              |  18 -
 18 files changed, 1340 insertions(+), 412 deletions(-)
 create mode 100644 src/eikonal/build.sh
 create mode 100644 src/eikonal/common_def.h
 create mode 100644 src/eikonal/cuda_fim.cu
 create mode 100644 src/eikonal/cuda_fim.h
 create mode 100644 src/eikonal/cuda_fim_kernel.cu
 create mode 100644 src/eikonal/cuda_fim_kernel.h
 create mode 100644 src/eikonal/eikonal_solver.c
 create mode 100644 src/eikonal/eikonal_solver.h
 delete mode 100644 src/utils/heap.c
 delete mode 100644 src/utils/heap.h

diff --git a/bsbash/find_functions.sh b/bsbash/find_functions.sh
index 1d0b892d..3a0d17bb 100644
--- a/bsbash/find_functions.sh
+++ b/bsbash/find_functions.sh
@@ -8,8 +8,14 @@ FIND_CUDA () {
 	NVCC=""
 	CUDA_FOUND=""
 
+	LD_CONFIG=ldconfig
+
+	if [ "$OS" == "openSUSE Tumbleweed" ]; then
+		LD_CONFIG=/sbin/ldconfig
+	fi
+
 	if [ -z "$CUDA_LIBRARY_PATH" ]; then
-		CUDA_LIBRARY_PATH=$(dirname "$(ldconfig -p | grep libcudart | awk '{print $4}' | head -n 1 | head -c -5)" 2> /dev/null)
+		CUDA_LIBRARY_PATH=$(dirname "$($LD_CONFIG -p | grep libcudart | awk '{print $4}' | head -n 1 | head -c -5)" 2> /dev/null)
 	fi
 
 	if [ -z "$CUDA_INCLUDE_PATH" ]; then
@@ -19,6 +25,8 @@ FIND_CUDA () {
 		elif [ "$OS" == "Fedora" ]; then
 			CUDA_INCLUDE_PATH="/usr/local/cuda/include"
 			CUDA_LIBRARY_PATH="/usr/local/cuda/lib64"
+		elif [ "$OS" == "openSUSE Tumbleweed" ]; then
+			CUDA_INCLUDE_PATH="/usr/local/cuda/include"
 		else
             if [ "$CI" = true ]; then
 			    CUDA_INCLUDE_PATH='/usr/local/cuda/include'
@@ -120,7 +128,7 @@ FIND_MPI () {
 }
 
 FIND_AMGX() {
-  AMGX_LIBRARIES=""
+  AMGX_LIBRARIES="amgxsh"
   AMGX_LIBRARY_PATH=""
   AMGX_INCLUDE_PATH=""
   AMGX_FOUND=""
diff --git a/build.sh b/build.sh
index b2cbfa95..ae1d54a9 100755
--- a/build.sh
+++ b/build.sh
@@ -185,6 +185,7 @@ ADD_SUBDIRECTORY "src/3dparty/tinyexpr"
 ADD_SUBDIRECTORY "src/3dparty/miniz"
 ADD_SUBDIRECTORY "src/vtk_utils"
 ADD_SUBDIRECTORY "src/ensight_utils"
+ADD_SUBDIRECTORY "src/eikonal/"
 
 
 #DINAMIC DEPS
@@ -265,7 +266,6 @@ if [ -n "$COMPILE_MPI" ]; then
 	 COMPILE_EXECUTABLE "MonoAlg3D_batch" "$SRC_FILES" "$HDR_FILES" "$STATIC_DEPS" "$DYNAMIC_DEPS" "$EXECUTABLES_LIBRARY_PATH $EXTRA_LIB_PATH" "$INCLUDE_P -I$MPI_INCLUDE_PATH"
       fi
 
-
   fi
 
 fi
@@ -296,7 +296,7 @@ if [ -n "$COMPILE_CLIP" ]; then
 fi
 
 if [ -n "$COMPILE_EIKONAL" ]; then
-    COMPILE_EXECUTABLE "MonoAlg3D_eikonal_solver" "src/main_eikonal.c" "" "$STATIC_DEPS" "$DYNAMIC_DEPS" "$EXECUTABLES_LIBRARY_PATH $EXTRA_LIB_PATH"
+    COMPILE_EXECUTABLE "MonoAlg3D_eikonal_solver" "src/main_eikonal.c" "" "$STATIC_DEPS" "$DYNAMIC_DEPS eikonal_solver" "$EXECUTABLES_LIBRARY_PATH $EXTRA_LIB_PATH"
 fi
 
 
diff --git a/example_configs/plain_mesh_no_fibrosis_example.ini b/example_configs/plain_mesh_no_fibrosis_example.ini
index 1a4d7472..58098472 100644
--- a/example_configs/plain_mesh_no_fibrosis_example.ini
+++ b/example_configs/plain_mesh_no_fibrosis_example.ini
@@ -23,7 +23,7 @@ file_prefix=V
 [assembly_matrix]
 init_function=set_initial_conditions_fvm
 sigma_x=0.0000176
-sigma_y=0.0001334
+sigma_y=0.0000176
 sigma_z=0.0000176
 library_file=shared_libs/libdefault_matrix_assembly.so
 main_function=homogeneous_sigma_assembly_matrix
@@ -65,7 +65,9 @@ start = 0.0
 duration = 2.0
 period = 250.0
 current = -50.0
-x_limit = 100.0
-main_function=stim_if_x_less_than
+min_x = 0.0
+max_x = 400.0
+min_y = 0.0
+max_y = 400.0
+main_function=stim_x_y_limits
 ;////////////////////////////////////////////////
-
diff --git a/src/common_types/common_types.h b/src/common_types/common_types.h
index 09f6049f..d55fb782 100644
--- a/src/common_types/common_types.h
+++ b/src/common_types/common_types.h
@@ -1,9 +1,9 @@
 #ifndef MONOALG3D_COMMON_TYPES_H
 #define MONOALG3D_COMMON_TYPES_H
 
+#include "../3dparty/sds/sds.h"
 #include <stdbool.h>
 #include <stdint.h>
-#include "../3dparty/sds/sds.h"
 
 #define Pragma(x) _Pragma(#x)
 #define OMP(directive) Pragma(omp directive)
@@ -17,6 +17,42 @@ typedef double real;
 typedef float real;
 #endif
 
+#define EPS (real)1e-16
+
+#define ALLOCATE_3D_ARRAY(array, type, size_x, size_y, size_z)                                                                                                 \
+    do {                                                                                                                                                       \
+        array = (type ***)malloc(size_x * sizeof(type **));                                                                                                    \
+                                                                                                                                                               \
+        if(array == NULL) {                                                                                                                                    \
+            log_error_and_exit("Memory allocation failed for first dimension\n");                                                                              \
+        }                                                                                                                                                      \
+                                                                                                                                                               \
+        for(int i = 0; i < size_x; ++i) {                                                                                                                      \
+            array[i] = (type **)malloc(size_y * sizeof(type *));                                                                                               \
+            if(array[i] == NULL) {                                                                                                                             \
+                log_error_and_exit("Memory allocation failed for second dimension\n");                                                                         \
+            }                                                                                                                                                  \
+                                                                                                                                                               \
+            for(int j = 0; j < size_y; ++j) {                                                                                                                  \
+                array[i][j] = (type *)calloc(size_z, sizeof(type));                                                                                            \
+                if(array[i][j] == NULL) {                                                                                                                      \
+                    log_error_and_exit("Memory allocation failed for third dimension\n");                                                                      \
+                }                                                                                                                                              \
+            }                                                                                                                                                  \
+        }                                                                                                                                                      \
+    } while(0)
+
+#define FREE_3D_ARRAY(array, size_x, size_y)                                                                                                                   \
+    do {                                                                                                                                                       \
+        for(int i = 0; i < size_x; ++i) {                                                                                                                      \
+            for(int j = 0; j < size_y; ++j) {                                                                                                                  \
+                free(array[i][j]);                                                                                                                             \
+            }                                                                                                                                                  \
+            free(array[i]);                                                                                                                                    \
+        }                                                                                                                                                      \
+        free(array);                                                                                                                                           \
+    } while(0)
+
 #define MALLOC_BYTES(type, bytes) (type *)malloc(bytes)
 #define MALLOC_ONE_TYPE(type) (type *)malloc(sizeof(type))
 #define MALLOC_ARRAY_OF_TYPE(type, n) (type *)malloc(sizeof(type) * (n))
@@ -142,8 +178,8 @@ struct simulation_files {
 #define STRING_HASH_PRINT_KEY_VALUE_LOG(tag, d)                                                                                                                \
     do {                                                                                                                                                       \
         for(int64_t __i = 0; __i < shlen(d); __i++) {                                                                                                          \
-            struct string_hash_entry __e = (d)[__i];                                                                                                             \
-            log_info("%s %s = %s\n", tag, __e.key, __e.value);                                                                                                     \
+            struct string_hash_entry __e = (d)[__i];                                                                                                           \
+            log_info("%s %s = %s\n", tag, __e.key, __e.value);                                                                                                 \
         }                                                                                                                                                      \
     } while(0)
 
diff --git a/src/eikonal/build.sh b/src/eikonal/build.sh
new file mode 100644
index 00000000..f3162177
--- /dev/null
+++ b/src/eikonal/build.sh
@@ -0,0 +1,10 @@
+if [ -n "$CUDA_FOUND" ]; then
+
+    EIKONAL_SOURCE_FILES="eikonal_solver.c cuda_fim.cu cuda_fim_kernel.cu"
+    EIKONAL_HEADER_FILES="eikonal_solver.h common_def.h cuda_fim.h cuda_fim_kernel.h"
+    EIKONAL_EXTRA_LIB_PATH=$CUDA_LIBRARY_PATH
+    EIKONAL_DYNAMIC_LIBS="c cudart"
+
+    COMPILE_SHARED_LIB "eikonal_solver" "$EIKONAL_SOURCE_FILES" "$EIKONAL_HEADER_FILES" "" "$EIKONAL_DYNAMIC_LIBS" "$EIKONAL_EXTRA_LIB_PATH" "" "$CUDA_FOUND"
+
+fi
diff --git a/src/eikonal/common_def.h b/src/eikonal/common_def.h
new file mode 100644
index 00000000..326a16c4
--- /dev/null
+++ b/src/eikonal/common_def.h
@@ -0,0 +1,60 @@
+//
+// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
+//
+// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
+//
+// 2016. 2. 4
+//
+
+//
+// Common to entire project
+//
+
+#ifndef __COMMON_DEF_H__
+#define __COMMON_DEF_H__
+
+#include <stdbool.h>
+#include "../common_types/common_types.h"
+
+//
+// common definition for Eikonal solvers
+//
+#ifndef INF
+#define INF 1e20//FLT_MAX //
+#endif
+
+#define BLOCK_LENGTH 4
+
+//
+// itk image volume definition for 3D anisotropic eikonal solvers
+//
+typedef unsigned int uint;
+typedef unsigned char uchar;
+
+struct CUDA_MEM_STRUCTURE {
+  // volsize/blksize : # of pixel in volume/block
+  // blknum : # of block
+  // blklength : # of pixel in one dimemsion of block
+  uint nActiveBlock, blknum, volsize, blksize;
+  int xdim, ydim, zdim, nIter, blklength; // new new x,y,z dim to aligh power of 4
+
+  // host memory
+  uint *h_list;
+  bool *h_listVol, *h_listed;
+
+  // device memory
+  uint *d_list;
+  real *d_spd;
+  bool *d_mask, *d_listVol, *d_con;
+  
+  real *h_sol;//h_speedtable[256];
+  real *d_sol, *t_sol; 
+
+  // GroupOrder
+  int* blockOrder;
+  int K;
+};
+
+typedef struct CUDA_MEM_STRUCTURE CUDAMEMSTRUCT;
+
+#endif
diff --git a/src/eikonal/cuda_fim.cu b/src/eikonal/cuda_fim.cu
new file mode 100644
index 00000000..901c088c
--- /dev/null
+++ b/src/eikonal/cuda_fim.cu
@@ -0,0 +1,226 @@
+//
+// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
+//
+// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
+//
+// 2016. 2. 4
+//
+
+#include <cstdio>
+#include <assert.h>
+#include "cuda_fim_kernel.h"
+#include "cuda_fim.h"
+#include "../gpu_utils/gpu_utils.h"
+#include <math.h>
+
+void run_eikonal_solver_simple(CUDAMEMSTRUCT *cmem, bool verbose) {
+    
+    int deviceID;
+    cudaGetDevice(&deviceID);
+    cudaDeviceProp deviceProp;
+    cudaGetDeviceProperties(&deviceProp, deviceID);
+    if (verbose) {
+        printf("Current device id : %d, name : %s\n", deviceID, deviceProp.name);
+    }
+
+    int xdim, ydim, zdim;
+    xdim = cmem->xdim;
+    ydim = cmem->ydim;
+    zdim = cmem->zdim;
+
+    // create volumes
+    uint volSize = cmem->volsize;
+    uint blockNum = cmem->blknum;
+
+    if (verbose) {
+        printf("# of total voxels : %d\n", volSize);
+        printf("# of total blocks : %d\n", blockNum);
+    }
+
+    // h_ : host memory, d_ : device memory
+    int nIter = cmem->nIter;
+    uint nActiveBlock = cmem->nActiveBlock; // active list
+
+    real*d_spd = cmem->d_spd;
+    real *d_sol = cmem->d_sol;
+    real *t_sol = cmem->t_sol;
+
+    uint *d_list = cmem->d_list;
+    bool *d_listVol = cmem->d_listVol;
+
+    bool *d_con = cmem->d_con;
+    bool *d_mask = cmem->d_mask;
+
+    // copy so that original value should not be modified
+    uint *h_list = (uint*) malloc(blockNum*sizeof(uint));
+    bool *h_listed = (bool*) malloc(blockNum*sizeof(bool));
+    bool *h_listVol = (bool*) malloc(blockNum*sizeof(bool));
+
+    // initialization
+    memcpy(h_list, cmem->h_list, blockNum*sizeof(uint));
+    memcpy(h_listed, cmem->h_listed, blockNum*sizeof(bool));
+    memcpy(h_listVol, cmem->h_listVol, blockNum*sizeof(bool));
+
+    check_cuda_error( cudaMemcpy(cmem->d_list, cmem->h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(cmem->d_listVol, cmem->h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(cmem->d_sol, cmem->h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(cmem->t_sol, cmem->h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemset(cmem->d_con, 1, volSize*sizeof(bool)) );
+
+    // set dimension of block and entire grid size
+    dim3 dimBlock(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH);
+    dim3 dimEntireGrid(blockNum);
+    dim3 dimGrid(nActiveBlock);
+
+    int nTotalIter = 0;
+
+    uint nTotalBlockProcessed = 0;
+
+    // start solver
+    while(nActiveBlock > 0)
+    {
+        assert(nActiveBlock < 4294967295);
+
+        nTotalBlockProcessed += nActiveBlock;
+
+        nTotalIter++;
+
+        //
+        // solve current blocks in the active lists
+        //
+
+        //            printf("# of active tiles : %u\n", nActiveBlock);
+        if (verbose) {
+            printf("# of active tiles : %u\n", nActiveBlock);
+        }
+        //////////////////////////////////////////////////////////////////
+        // 1. run solver on current active tiles
+
+        dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1;
+        dimGrid.x = (unsigned int)ceil ((double)nActiveBlock/(double)dimGrid.y);
+
+        if (verbose) {
+            printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y);
+        }
+
+        check_cuda_error( cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
+
+        run_solver<<< dimGrid, dimBlock >>>(d_spd, d_mask, d_sol, t_sol, d_con, d_list, xdim, ydim, zdim, nIter, nActiveBlock);
+
+        check_cuda_error(cudaGetLastError());
+
+        cudaDeviceSynchronize();
+
+
+        //////////////////////////////////////////////////////////////////
+        // 2. reduction (only active tiles)
+
+        run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock);
+
+        check_cuda_error(cudaGetLastError());
+
+        cudaDeviceSynchronize();
+
+        //////////////////////////////////////////////////////////////////
+        // 3. check neighbor tiles of converged tile
+        // Add any active block of neighbor of converged block is inserted
+        // to the list
+
+        check_cuda_error( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) );
+
+        uint nOldActiveBlock = nActiveBlock;
+        uint nBlkX = xdim/BLOCK_LENGTH;
+        uint nBlkY = ydim/BLOCK_LENGTH;
+
+        for(uint i=0; i<nOldActiveBlock; i++)
+        {
+            // check 6-neighbor of current active tile
+            uint currBlkIdx = h_list[i];
+
+            if(!h_listVol[currBlkIdx]) // not active : converged
+            {
+                uint nb[6];
+                nb[0] = (currBlkIdx < nBlkX*nBlkY) ? currBlkIdx : (currBlkIdx - nBlkX*nBlkY);    //tp
+                nb[1] = ((currBlkIdx + nBlkX*nBlkY) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX*nBlkY); //bt
+                nb[2] = (currBlkIdx < nBlkX) ? currBlkIdx : (currBlkIdx - nBlkX); //up
+                nb[3] = ((currBlkIdx + nBlkX) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX); //dn
+                nb[4] = (currBlkIdx%nBlkX == 0) ? currBlkIdx : currBlkIdx-1; //lf
+                nb[5] = ((currBlkIdx+1)%nBlkX == 0) ? currBlkIdx : currBlkIdx+1; //rt
+
+                for(int nbIdx = 0; nbIdx < 6; nbIdx++)
+                {
+                    uint currIdx = nb[nbIdx];
+
+                    if(!h_listed[currIdx])
+                    {
+                        h_listed[currIdx] = true;
+                        h_list[nActiveBlock++] = currIdx;
+                    }
+                }
+            }
+        }
+        cudaDeviceSynchronize();
+
+        //////////////////////////////////////////////////////////////////
+        // 4. run solver only once for neighbor blocks of converged block
+        // current active list contains active blocks and neighbor blocks of
+        // any converged blocks.
+        //
+
+        // update grid dimension because nActiveBlock is changed
+        dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1;
+        dimGrid.x = (unsigned int)ceil((double)nActiveBlock/(double)dimGrid.y);
+
+        if (verbose) {
+            printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y);
+        }
+
+        check_cuda_error(cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
+        run_check_neighbor<<< dimGrid, dimBlock >>>(d_spd, d_mask, t_sol, d_sol, d_con, d_list, xdim, ydim, zdim, nOldActiveBlock, nActiveBlock);
+        check_cuda_error(cudaGetLastError());
+        cudaDeviceSynchronize();
+
+
+        //////////////////////////////////////////////////////////////////
+        // 5. reduction
+
+        run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock);
+        check_cuda_error(cudaGetLastError());
+        cudaDeviceSynchronize();
+
+
+        //////////////////////////////////////////////////////////////////
+        // 6. update active list
+        // read back active volume from the device and add
+        // active block to active list on the host memory
+
+
+        nActiveBlock = 0;
+        check_cuda_error( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) );
+
+        for(uint i=0; i<blockNum; i++)
+        {
+            if(h_listVol[i]) // true : active block (not converged)
+            {
+                h_listed[i] = true;
+                h_list[nActiveBlock++] = i;
+            }
+            else h_listed[i] = false;
+        }
+        cudaDeviceSynchronize();
+
+        if (verbose) {
+            printf("Iteration : %d\n", nTotalIter);
+        }
+    }
+
+    if (verbose) {
+        printf("Eikonal solver converged after %d iterations\n", nTotalIter);
+        printf("Total # of blocks processed : %d\n", nTotalBlockProcessed);
+    }
+
+    // delete dynamically allocated host memory
+    free(h_list);
+    free(h_listed);
+    free(h_listVol);
+}
diff --git a/src/eikonal/cuda_fim.h b/src/eikonal/cuda_fim.h
new file mode 100644
index 00000000..418e375b
--- /dev/null
+++ b/src/eikonal/cuda_fim.h
@@ -0,0 +1,21 @@
+//
+// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
+//
+// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
+//
+// 2016. 2. 4
+//
+#ifndef __CUDA_FIM_H__
+#define __CUDA_FIM_H__
+
+#include "common_def.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void run_eikonal_solver_simple(CUDAMEMSTRUCT *cmem, bool verbose);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/eikonal/cuda_fim_kernel.cu b/src/eikonal/cuda_fim_kernel.cu
new file mode 100644
index 00000000..27a1e881
--- /dev/null
+++ b/src/eikonal/cuda_fim_kernel.cu
@@ -0,0 +1,424 @@
+//
+// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
+//
+// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
+//
+// 2016. 2. 4
+//
+
+#include "cuda_fim_kernel.h"
+
+__device__ real get_time_eikonal(real a, real b, real c, real s)
+{
+	real ret, tmp;
+
+	// a > b > c
+	if(a < b) { tmp = a; a = b; b = tmp; }
+	if(b < c) { tmp = b; b = c; c = tmp; }
+	if(a < b) { tmp = a; a = b; b = tmp; }
+
+	ret = INF;
+
+	if(c < INF)
+	{
+		ret = c + s;
+		
+		if(ret > b) 
+		{	
+			tmp = ((b+c) + sqrtf(2.0f*s*s-(b-c)*(b-c)))*0.5f;
+		
+			if(tmp > b) ret = tmp; 
+
+			if(ret > a)	{			
+				tmp = (a+b+c)/3.0f + sqrtf(2.0f*(a*(b-a)+b*(c-b)+c*(a-c))+3.0f*s*s)/3.0f; 
+
+				if(tmp > a) ret = tmp;
+			}
+		}
+	}
+
+	return ret;
+}
+
+__global__ void run_solver(real* spd, bool* mask, const real *sol_in, real *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim, int nIter, uint nActiveBlock)
+{
+	uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
+
+	if(list_idx < nActiveBlock)
+	{
+		// retrieve actual block index from the active list
+		uint block_idx = list[list_idx];
+
+		real F;
+		bool isValid;
+		uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
+		uint base_addr = block_idx*blocksize;
+
+		uint xgridlength = xdim/BLOCK_LENGTH;
+		uint ygridlength = ydim/BLOCK_LENGTH;
+		uint zgridlength = zdim/BLOCK_LENGTH;
+
+		// compute block index
+		uint bx = block_idx%xgridlength;
+		uint tmpIdx = (block_idx - bx)/xgridlength;
+		uint by = tmpIdx%ygridlength;
+		uint bz = (tmpIdx-by)/ygridlength;
+
+		uint tx = threadIdx.x;
+		uint ty = threadIdx.y;
+		uint tz = threadIdx.z;
+		uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
+
+		__shared__ real _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2];
+
+		// copy global to shared memory
+		dim3 idx(tx+1,ty+1,tz+1);
+
+		SOL(idx.x,idx.y,idx.z) = sol_in[base_addr + tIdx];
+		F = spd[base_addr + tIdx];
+		if(F > 0) F = 1.0/F; // F = 1/f
+		isValid = mask[base_addr + tIdx];
+
+		uint new_base_addr, new_tIdx;
+
+		// 1-neighborhood values
+		if(tx == 0) 
+		{
+			if(bx == 0) // end of the grid
+			{	
+				new_tIdx = tIdx;
+				new_base_addr = base_addr;
+			}
+			else
+			{
+				new_tIdx = tIdx + BLOCK_LENGTH-1;
+				new_base_addr = (block_idx - 1)*blocksize;	
+			}
+
+			SOL(tx,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx];	
+		}
+
+		if(tx == BLOCK_LENGTH-1)
+		{
+			if(bx == xgridlength-1) // end of the grid
+			{
+				new_tIdx = tIdx;
+				new_base_addr = base_addr;
+			}
+			else
+			{
+				new_tIdx = tIdx - (BLOCK_LENGTH-1);
+				new_base_addr = (block_idx + 1)*blocksize;	
+			}
+			SOL(tx+2,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx];	
+		}
+
+		if(ty == 0)
+		{
+			if(by == 0)
+			{
+				new_tIdx = tIdx;
+				new_base_addr = base_addr;
+			}
+			else
+			{
+				new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH;
+				new_base_addr = (block_idx - xgridlength)*blocksize;
+			}
+
+			SOL(idx.x,ty,idx.z) = sol_in[new_base_addr + new_tIdx];
+		}
+
+		if(ty == BLOCK_LENGTH-1)
+		{
+			if(by == ygridlength-1) 
+			{
+				new_tIdx = tIdx;
+				new_base_addr = base_addr;
+			}
+			else
+			{
+				new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH;
+				new_base_addr = (block_idx + xgridlength)*blocksize;
+			}
+
+			SOL(idx.x,ty+2,idx.z) = sol_in[new_base_addr + new_tIdx];
+		}
+
+		if(tz == 0)
+		{
+			if(bz == 0)
+			{
+				new_tIdx = tIdx;
+				new_base_addr = base_addr;
+			}
+			else
+			{
+				new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
+				new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize;
+			}
+
+			SOL(idx.x,idx.y,tz) = sol_in[new_base_addr + new_tIdx];
+		}
+
+		if(tz == BLOCK_LENGTH-1)
+		{
+			if(bz == zgridlength-1) 
+			{
+				new_tIdx = tIdx;
+				new_base_addr = base_addr;
+			}
+			else
+			{
+				new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
+				new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize;
+			}
+
+			SOL(idx.x,idx.y,tz+2) = sol_in[new_base_addr + new_tIdx];
+		}
+
+		__syncthreads();
+
+		real a,b,c,oldT,newT;
+
+		for(int iter=0; iter<nIter; iter++)	
+		{
+			//
+			// compute new value
+			//
+			oldT = newT = SOL(idx.x,idx.y,idx.z);
+
+			if(isValid)
+			{
+				a = min(SOL(tx,idx.y,idx.z),SOL(tx+2,idx.y,idx.z));
+				b = min(SOL(idx.x,ty,idx.z),SOL(idx.x,ty+2,idx.z));
+				c = min(SOL(idx.x,idx.y,tz),SOL(idx.x,idx.y,tz+2));
+
+				real tmp = (real) get_time_eikonal(a, b, c, F);
+
+				newT = min(tmp,oldT);
+			}
+			__syncthreads();	
+
+			if(isValid) SOL(idx.x,idx.y,idx.z) = newT;
+
+			__syncthreads(); // this may not required		
+		}
+
+
+		real residue = oldT - newT;
+
+		// write back to global memory
+		con[base_addr + tIdx] = (residue < EPS) ? true : false;
+		sol_out[base_addr + tIdx] = newT;		
+	}
+}
+
+__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock)
+{
+	uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
+
+	if(list_idx < nActiveBlock)
+	{
+		uint block_idx = list[list_idx];
+
+		__shared__ bool conv[BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH];
+
+		uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH/2;
+		uint base_addr = block_idx*blocksize*2;
+		uint tx = threadIdx.x;
+		uint ty = threadIdx.y;
+		uint tz = threadIdx.z;
+		uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
+
+		conv[tIdx] = con[base_addr + tIdx];
+		conv[tIdx + blocksize] = con[base_addr + tIdx + blocksize];
+
+		__syncthreads();
+
+		for(uint i=blocksize; i>0; i/=2)
+		{
+			if(tIdx < i)
+			{
+				bool b1, b2;
+				b1 = conv[tIdx];
+				b2 = conv[tIdx+i];
+				conv[tIdx] = (b1 && b2) ? true : false ;
+			}
+			__syncthreads();
+		}
+
+		if(tIdx == 0) 
+		{		
+			listVol[block_idx] = !conv[0]; // active list is negation of tile convergence (active = not converged)
+		}
+	}
+}
+
+__global__ void run_check_neighbor(real* spd, bool* mask, const real *sol_in, real *sol_out, 
+																	 bool *con, uint* list, int xdim, int ydim, int zdim,
+																	 uint nActiveBlock, uint nTotalBlock)
+{
+
+	uint list_idx = blockIdx.y*gridDim.x + blockIdx.x;
+
+	if(list_idx < nTotalBlock)
+	{
+		real F;
+		bool isValid;
+		__shared__ real _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2];
+
+		uint block_idx = list[list_idx];
+		uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
+		uint base_addr = block_idx*blocksize;
+
+		uint tx = threadIdx.x;
+		uint ty = threadIdx.y;
+		uint tz = threadIdx.z;
+		uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx;
+
+		if(list_idx < nActiveBlock) // copy value
+		{
+			sol_out[base_addr + tIdx] = sol_in[base_addr + tIdx];
+		} 
+		else
+		{
+			uint xgridlength = xdim/BLOCK_LENGTH;
+			uint ygridlength = ydim/BLOCK_LENGTH;
+			uint zgridlength = zdim/BLOCK_LENGTH;
+
+			// compute block index
+			uint bx = block_idx%xgridlength;
+			uint tmpIdx = (block_idx - bx)/xgridlength;
+			uint by = tmpIdx%ygridlength;
+			uint bz = (tmpIdx-by)/ygridlength;
+
+			// copy global to shared memory
+			dim3 idx(tx+1,ty+1,tz+1);
+			_sol[idx.x][idx.y][idx.z] = sol_in[base_addr + tIdx];
+			F = spd[base_addr + tIdx];
+			if(F > 0) F = 1.0/F;
+			isValid = mask[base_addr + tIdx];
+
+			uint new_base_addr, new_tIdx;
+
+			// 1-neighborhood values
+			if(tx == 0) 
+			{
+				if(bx == 0) // end of the grid
+				{	
+					new_tIdx = tIdx;
+					new_base_addr = base_addr;
+				}
+				else
+				{
+					new_tIdx = tIdx + BLOCK_LENGTH-1;
+					new_base_addr = (block_idx - 1)*blocksize;	
+				}
+				_sol[tx][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx];	
+			}
+
+			if(tx == BLOCK_LENGTH-1)
+			{
+				if(bx == xgridlength-1) // end of the grid
+				{
+					new_tIdx = tIdx;
+					new_base_addr = base_addr;
+				}
+				else
+				{
+					new_tIdx = tIdx - (BLOCK_LENGTH-1);
+					new_base_addr = (block_idx + 1)*blocksize;	
+				}
+				_sol[tx+2][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx];	
+			}
+
+			if(ty == 0)
+			{
+				if(by == 0)
+				{
+					new_tIdx = tIdx;
+					new_base_addr = base_addr;
+				}
+				else
+				{
+					new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH;
+					new_base_addr = (block_idx - xgridlength)*blocksize;
+				}
+				_sol[idx.x][ty][idx.z] = sol_in[new_base_addr + new_tIdx];
+			}
+
+			if(ty == BLOCK_LENGTH-1)
+			{
+				if(by == ygridlength-1) 
+				{
+					new_tIdx = tIdx;
+					new_base_addr = base_addr;
+				}
+				else
+				{
+					new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH;
+					new_base_addr = (block_idx + xgridlength)*blocksize;
+				}
+				_sol[idx.x][ty+2][idx.z] = sol_in[new_base_addr + new_tIdx];
+			}
+
+			if(tz == 0)
+			{
+				if(bz == 0)
+				{
+					new_tIdx = tIdx;
+					new_base_addr = base_addr;
+				}
+				else
+				{
+					new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
+					new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize;
+				}
+				_sol[idx.x][idx.y][tz] = sol_in[new_base_addr + new_tIdx];
+			}
+
+			if(tz == BLOCK_LENGTH-1)
+			{
+				if(bz == zgridlength-1) 
+				{
+					new_tIdx = tIdx;
+					new_base_addr = base_addr;
+				}
+				else
+				{
+					new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH;
+					new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize;
+				}
+				_sol[idx.x][idx.y][tz+2] = sol_in[new_base_addr + new_tIdx];
+			}
+
+			__syncthreads();
+
+
+			real a,b,c,oldT,newT;
+
+			//
+			// compute new value
+			//
+			oldT = newT = _sol[idx.x][idx.y][idx.z];
+
+			if(isValid)
+			{
+				a = min(_sol[tx][idx.y][idx.z],_sol[tx+2][idx.y][idx.z]);
+				b = min(_sol[idx.x][ty][idx.z],_sol[idx.x][ty+2][idx.z]);
+				c = min(_sol[idx.x][idx.y][tz],_sol[idx.x][idx.y][tz+2]);
+
+				real tmp = (real) get_time_eikonal(a, b, c, F);
+				newT = min(tmp,oldT);
+
+				sol_out[base_addr + tIdx] = newT;
+			}
+			// write back to global memory
+			real residue = oldT - newT;
+			con[base_addr + tIdx] = (residue < EPS) ? true : false;	
+		}
+	}
+}
+
+
diff --git a/src/eikonal/cuda_fim_kernel.h b/src/eikonal/cuda_fim_kernel.h
new file mode 100644
index 00000000..874b49a2
--- /dev/null
+++ b/src/eikonal/cuda_fim_kernel.h
@@ -0,0 +1,41 @@
+//
+// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations
+//
+// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr)
+//
+// 2016. 2. 4
+//
+
+#ifndef _cuda_fim_KERNEL_H_
+#define _cuda_fim_KERNEL_H_
+
+#include "common_def.h"
+
+#define MEM(index) _mem[index]
+#define SOL(i,j,k) _sol[i][j][k]
+#define SPD(i,j,k) _spd[i][j][k]
+
+__device__ real get_time_eikonal(real a, real b, real c, real s);
+//
+// F : Input speed (positive)
+// if F =< 0, skip that pixel (masking out)
+//
+__global__ void run_solver(real* spd, bool* mask, const real *sol_in, 
+  real *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim,
+  int nIter, uint nActiveBlock);
+//
+// run_reduction
+//
+// con is pixelwise convergence. Do reduction on active tiles and write tile-wise
+// convergence to listVol. The implementation assumes that the block size is 4x4x4.
+//
+__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock);
+//
+// if block is active block, copy values
+// if block is neighbor, run solver once
+//
+__global__ void run_check_neighbor(real* spd, bool* mask, const real *sol_in, real *sol_out,
+  bool *con, uint* list, int xdim, int ydim, int zdim,
+  uint nActiveBlock, uint nTotalBlock);
+#endif // #ifndef _cuda_fim_KERNEL_H_
+
diff --git a/src/eikonal/eikonal_solver.c b/src/eikonal/eikonal_solver.c
new file mode 100644
index 00000000..e4eed189
--- /dev/null
+++ b/src/eikonal/eikonal_solver.c
@@ -0,0 +1,427 @@
+#include <assert.h>
+#include <cuda_runtime.h>
+#include "eikonal_solver.h"
+#include "../gpu_utils/gpu_utils.h"
+#include "../logger/logger.h"
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../3dparty/stb_ds.h"
+#include "cuda_fim.h"
+
+struct eikonal_solver * new_eikonal_solver(bool verbose) {
+
+    struct eikonal_solver *solver = MALLOC_ONE_TYPE(struct eikonal_solver);
+    solver->verbose = verbose;
+    solver->cuda_mem_created = false;
+    solver->width = 256;
+    solver->height = 256;
+    solver->depth = 256;
+    solver->iters_per_block = 10;
+    solver->solver_type = 0;
+    solver->speeds = NULL;
+    solver->seeds = NULL;
+    solver->mask = NULL;
+
+    return solver;
+}
+
+void free_eikonal_solver(struct eikonal_solver *solver) {
+
+    //TODO: free 3D arrays
+    FREE_3D_ARRAY(solver->speeds, solver->width, solver->height);
+    FREE_3D_ARRAY(solver->answer, solver->width, solver->height);
+    FREE_3D_ARRAY(solver->mask, solver->width, solver->height);
+
+    if(solver->cuda_mem_created) {
+        free(solver->memory_struct.h_sol);
+        free(solver->memory_struct.h_list);
+        free(solver->memory_struct.h_listed);
+        free(solver->memory_struct.h_listVol);
+        free(solver->memory_struct.blockOrder);
+        check_cuda_error( cudaFree(solver->memory_struct.d_spd) );
+        check_cuda_error( cudaFree(solver->memory_struct.d_sol) );
+        check_cuda_error( cudaFree(solver->memory_struct.t_sol) );  // temp solution for ping-pong
+        check_cuda_error( cudaFree(solver->memory_struct.d_con) );  // convergence volume
+        check_cuda_error( cudaFree(solver->memory_struct.d_list) );
+        check_cuda_error( cudaFree(solver->memory_struct.d_listVol) );
+        check_cuda_error( cudaFree(solver->memory_struct.d_mask) );
+    }
+
+    free(solver);
+}
+
+void check_cuda_memory(struct eikonal_solver *solver) {
+    if (solver->verbose) {
+        size_t free_mem, total_mem;
+        cudaMemGetInfo(&free_mem, &total_mem);
+
+        printf("Total Memory : %lld MB\n", total_mem / (1024LL * 1024LL));
+        printf("Free Memory  : %lld MB\n", free_mem / (1024LL * 1024LL));
+        printf("--\n");
+    }
+}
+
+void init_cuda_mem(struct eikonal_solver *solver) {
+
+    if(solver->width <= 0 || solver->height <= 0 || solver->depth <= 0) {
+        log_error_and_exit("Volume dimension cannot be zero");
+    }
+
+    check_cuda_memory(solver);
+
+    // 1. Create /initialize GPU memory
+    size_t nx, ny, nz;
+
+    nx = solver->width + (BLOCK_LENGTH-solver->width%BLOCK_LENGTH)%BLOCK_LENGTH;
+    ny = solver->height + (BLOCK_LENGTH-solver->height%BLOCK_LENGTH)%BLOCK_LENGTH;
+    nz = solver->depth + (BLOCK_LENGTH-solver->depth%BLOCK_LENGTH)%BLOCK_LENGTH;
+
+    if (solver->verbose) {
+        printf("%ld %ld %ld \n",nx,ny,nz);
+    }
+
+    size_t volSize = nx*ny*nz;
+    size_t blkSize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH;
+
+    size_t nBlkX = nx / BLOCK_LENGTH;
+    size_t nBlkY = ny / BLOCK_LENGTH;
+    size_t nBlkZ = nz / BLOCK_LENGTH;
+    size_t blockNum = nBlkX*nBlkY*nBlkZ;
+
+    solver->memory_struct.xdim = (int) nx;
+    solver->memory_struct.ydim = (int) ny;
+    solver->memory_struct.zdim = (int) nz;
+    solver->memory_struct.volsize = (uint) volSize;
+    solver->memory_struct.blksize = (uint) blkSize;
+    solver->memory_struct.blklength = BLOCK_LENGTH;
+    solver->memory_struct.blknum = (uint) blockNum;
+    solver->memory_struct.nIter = (int) solver->iters_per_block; // iter per block
+
+    if(solver->cuda_mem_created) // delete previous memory
+    {
+        free(solver->memory_struct.h_sol);
+        free(solver->memory_struct.h_list);
+        free(solver->memory_struct.h_listed);
+        free(solver->memory_struct.h_listVol);
+        free(solver->memory_struct.blockOrder);
+        check_cuda_error( cudaFree(solver->memory_struct.d_spd) );
+        check_cuda_error( cudaFree(solver->memory_struct.d_sol) );
+        check_cuda_error( cudaFree(solver->memory_struct.t_sol) );  // temp solution for ping-pong
+        check_cuda_error( cudaFree(solver->memory_struct.d_con) );  // convergence volume
+        check_cuda_error( cudaFree(solver->memory_struct.d_list) );
+        check_cuda_error( cudaFree(solver->memory_struct.d_listVol) );
+        check_cuda_error( cudaFree(solver->memory_struct.d_mask) );
+    }
+    solver->cuda_mem_created = true;
+
+    solver->memory_struct.h_sol = (real*) malloc(volSize*sizeof(real)); // initial solution
+    solver->memory_struct.h_list = (uint*) malloc(blockNum*sizeof(uint)); // linear list contains active block indices
+    solver->memory_struct.h_listed = (bool*) malloc(blockNum*sizeof(bool));  // whether block is added to the list
+    solver->memory_struct.h_listVol = (bool*) malloc(blockNum*sizeof(bool)); // volume list shows active/nonactive of corresponding block
+    solver->memory_struct.blockOrder = (int*) malloc(blockNum*sizeof(int));
+
+    check_cuda_memory(solver);
+
+    //
+    // create host/device memory using CUDA mem functions
+    //
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_spd), volSize*sizeof(real)) );
+    check_cuda_memory(solver);
+
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_sol), volSize*sizeof(real)) );
+    check_cuda_memory(solver);
+
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.t_sol), volSize*sizeof(real)) );  // temp solution for ping-pong
+    check_cuda_memory(solver);
+
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_con), volSize*sizeof(bool))  );  // convergence volume
+    check_cuda_memory(solver);
+
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_list), blockNum*sizeof(uint)) );
+    check_cuda_memory(solver);
+
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_listVol), blockNum*sizeof(bool)) );
+    check_cuda_memory(solver);
+
+    check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_mask), volSize*sizeof(bool)) );
+    check_cuda_memory(solver);
+}
+
+
+void set_attribute_mask(struct eikonal_solver *solver) {
+
+    assert(solver);
+    assert(solver->speeds);
+    assert(solver->mask);
+
+    uint volSize = solver->memory_struct.volsize;
+
+    int nx, ny, nz, blklength;
+
+    nx = solver->memory_struct.xdim;
+    ny = solver->memory_struct.ydim;
+    nz = solver->memory_struct.zdim;
+    blklength = solver->memory_struct.blklength;
+
+    // create host memory
+    real *h_spd  = MALLOC_ARRAY_OF_TYPE(real, volSize); // byte speed, host
+    bool  *h_mask = MALLOC_ARRAY_OF_TYPE(bool, volSize);
+
+    // copy input volume to host memory
+    // make each block to be stored contiguously in 1D memory space
+    uint idx = 0;
+    for(int zStr = 0; zStr < nz; zStr += blklength) {
+        for(int yStr = 0; yStr < ny; yStr += blklength) {
+            for(int xStr = 0; xStr < nx; xStr += blklength) {
+                // for each block
+                for(int z = zStr; z < zStr + blklength; z++) {
+                    for(int y = yStr; y < yStr + blklength; y++) {
+                        for(int x = xStr; x < xStr+blklength; x++) {
+                            h_spd[idx] = solver->speeds[x][y][z];
+                            h_mask[idx] = solver->mask[x][y][z];
+                            idx++;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // initialize GPU memory with host memory
+    check_cuda_error( cudaMemcpy(solver->memory_struct.d_spd, h_spd, volSize*sizeof(real), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(solver->memory_struct.d_mask, h_mask, volSize*sizeof(bool), cudaMemcpyHostToDevice) );
+
+    free(h_spd);
+    free(h_mask);
+}
+
+static void init_attribute_mask(struct eikonal_solver *solver) {
+
+    size_t size_x = solver->width;
+    size_t size_y = solver->height;
+    size_t size_z = solver->depth;
+
+    ALLOCATE_3D_ARRAY(solver->mask, bool, size_x, size_y, size_z);
+
+    size_t n_active = solver->num_active_cells;
+    struct cell_node **active_cells = solver->active_cells;
+
+    for(size_t i = 0; i < n_active; i++) {
+        int x = (int) active_cells[i]->center.x;
+        int y = (int) active_cells[i]->center.y;
+        int z = (int) active_cells[i]->center.z;
+        solver->mask[x][y][z] = true;
+    }
+
+}
+
+static void initialization(struct eikonal_solver *solver) {
+    assert(solver);
+    check_cuda_memory(solver);
+    init_cuda_mem(solver);
+    init_attribute_mask(solver);
+    set_attribute_mask(solver);
+    check_cuda_memory(solver);
+}
+
+static void get_solution(struct eikonal_solver *solver) {
+    // copy solution from GPU
+    check_cuda_error( cudaMemcpy(solver->memory_struct.h_sol,
+                                 solver->memory_struct.d_sol,
+                                 solver->memory_struct.volsize*sizeof(real),
+                                 cudaMemcpyDeviceToHost) );
+
+    size_t size_x = solver->width;
+    size_t size_y = solver->height;
+    size_t size_z = solver->depth;
+
+    ALLOCATE_3D_ARRAY(solver->answer, real, size_x, size_y, size_z);
+
+    for(size_t blockID = 0; blockID < solver->memory_struct.blknum; blockID++) {
+        size_t baseAddr = blockID * solver->memory_struct.blksize;
+        size_t xgridlength = solver->memory_struct.xdim/BLOCK_LENGTH;
+        size_t ygridlength = solver->memory_struct.ydim/BLOCK_LENGTH;
+        // compute block index
+        size_t bx = blockID%xgridlength;
+        size_t tmpIdx = (blockID - bx)/xgridlength;
+        size_t by = tmpIdx%ygridlength;
+        size_t bz = (tmpIdx-by)/ygridlength;
+        //translate back to real space
+        for(int k = 0; k < BLOCK_LENGTH; k++) {
+            for(int j = 0; j < BLOCK_LENGTH; j++) {
+                for(int i = 0; i < BLOCK_LENGTH; i++) {
+                    double d = solver->memory_struct.h_sol[baseAddr +
+                                                           k * BLOCK_LENGTH * BLOCK_LENGTH +
+                                                           j * BLOCK_LENGTH + i];
+                    if ((i + bx * BLOCK_LENGTH) < solver->width &&
+                        (j + by * BLOCK_LENGTH) < solver->height &&
+                        (k + bz * BLOCK_LENGTH) < solver->depth) {
+                        solver->answer[(i + bx * BLOCK_LENGTH)][(j +
+                                                                 by * BLOCK_LENGTH)][k + bz * BLOCK_LENGTH] = d;
+                    }
+                }
+            }
+        }
+    }
+}
+
+void map_generator(struct eikonal_solver *solver) {
+
+    double pi = 3.141592653589793238462643383;
+
+    size_t size_x = solver->width;
+    size_t size_y = solver->height;
+    size_t size_z = solver->depth;
+
+    ALLOCATE_3D_ARRAY(solver->speeds, real, size_x, size_y, size_z);
+
+    switch(solver->solver_type) {
+    case 0 :
+        //Constant Speed Map
+        for (int k = 0 ; k < solver->depth ; ++k) {
+            for (int j = 0 ; j < solver->height; ++j) {
+                for ( int i = 0 ; i < solver->width ; ++i) {
+                    solver->speeds[i][j][k] = 1.0;
+                }
+            }
+        }
+        break;
+    case 1 :
+        //Sinusoid Speed Map
+        for (int k = 0 ; k < solver->depth ; ++k) {
+            for (int j = 0 ; j < solver->height; ++j) {
+                for ( int i = 0 ; i < solver->width ; ++i) {
+                    solver->speeds[i][j][k] =
+                        (6.0 + 5.0 *(sin((i*pi)/(double)solver->width * 2.0))*
+                             sin((j*pi)/(double)solver->height*2.0)*
+                             sin((k*pi)/(double)solver->depth*2.0));
+                }
+            }
+        }
+
+        break;
+    }
+}
+
+void use_seeds(struct eikonal_solver *solver) {
+
+    if (solver->verbose) {
+        printf("Loading seed volume...\n");
+    }
+    uint volSize, blockNum;
+    int nx, ny, nz, blklength;
+
+    nx = solver->memory_struct.xdim;
+    ny = solver->memory_struct.ydim;
+    nz = solver->memory_struct.zdim;
+    volSize = solver->memory_struct.volsize;
+    blklength = solver->memory_struct.blklength;
+    blockNum = solver->memory_struct.blknum;
+
+    // copy input volume to host memory
+    // make each block to be stored contiguously in 1D memory space
+    uint idx = 0;
+    uint blk_idx = 0;
+    uint list_idx = 0;
+    uint nActiveBlock = 0;
+
+    for(int zStr = 0; zStr < nz; zStr += blklength) {
+        for(int yStr = 0; yStr < ny; yStr += blklength) {
+            for(int xStr = 0; xStr < nx; xStr += blklength) {
+                // for each block
+                bool isSeedBlock = false;
+
+                for(int z=zStr; z<zStr+blklength; z++) {
+                    for(int y=yStr; y<yStr+blklength; y++) {
+                        for(int x=xStr; x<xStr+blklength; x++) {
+                            solver->memory_struct.h_sol[idx] = INF;
+                            if (arrlen(solver->seeds) == 0) {
+                                if (x == nx/2 && y == ny/2 && z == nz/2) {
+                                    solver->memory_struct.h_sol[idx] = 0;
+                                    isSeedBlock = true;
+                                    if (solver->verbose) {
+                                        printf("%d is Selected bt source \n",idx);
+                                    }
+                                }
+                            } else {
+                                for(size_t i = 0; i < arrlen(solver->seeds); i++) {
+                                    if (solver->seeds[i][0] == x &&
+                                        solver->seeds[i][1] == y &&
+                                        solver->seeds[i][2] == z) {
+                                        solver->memory_struct.h_sol[idx] = 0;
+                                        isSeedBlock = true;
+                                        if (solver->verbose) {
+                                            printf("%d is Selected bt source \n",idx);
+                                        }
+                                    }
+                                }
+                            }
+                            idx++;
+                        }
+                    }
+                }
+                ///////////////////////////////////////////////
+                if(isSeedBlock) {
+                    if (solver->verbose) {
+                        printf("%d,%d,%d is Seed Block \n",zStr,yStr,xStr);
+                    }
+                    solver->memory_struct.h_listVol[blk_idx] = true;
+                    solver->memory_struct.h_listed[blk_idx] = true;
+                    solver->memory_struct.h_list[list_idx] = blk_idx;
+                    list_idx++;
+                    nActiveBlock++;
+                } else {
+                    solver->memory_struct.h_listVol[blk_idx] = false;
+                    solver->memory_struct.h_listed[blk_idx] = false;
+                }
+                blk_idx++;
+            }
+        }
+    }
+    solver->memory_struct.nActiveBlock = nActiveBlock;
+    // initialize GPU memory with host memory
+    check_cuda_error( cudaMemcpy(solver->memory_struct.d_sol, solver->memory_struct.h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(solver->memory_struct.t_sol, solver->memory_struct.h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(solver->memory_struct.d_list, solver->memory_struct.h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) );
+    check_cuda_error( cudaMemcpy(solver->memory_struct.d_listVol, solver->memory_struct.h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) );
+    // initialize GPU memory with constant value
+    check_cuda_error( cudaMemset(solver->memory_struct.d_con, 1, volSize*sizeof(bool)) );
+}
+
+void write_alg(struct eikonal_solver *solver, char *filename) {
+
+    FILE *out = fopen(filename, "w");
+
+    if (out == NULL) {
+        log_error_and_exit("Error opening file: %s\n", filename);
+    }
+
+    for (int k = 0; k < solver->depth; k++) {
+        for (int j = 0; j < solver->height; j++) {
+            for (int i = 0; i < solver->width; i++) {
+                if(solver->mask[i][j][k] == true) {
+                    real d = solver->answer[i][j][k];
+                    fprintf(out, "%lf, %lf, %lf, 0.5, 0.5, 0.5, %lf\n", i + 0.5, j + 0.5, k + 0.5, d);
+                }
+            }
+        }
+    }
+
+    fclose(out);
+
+}
+
+void solve_eikonal(struct eikonal_solver *solver) {
+
+    if (solver->speeds == NULL) {
+        map_generator(solver);
+    }
+
+    solver->cuda_mem_created = false;
+    initialization(solver);
+    use_seeds(solver);
+    run_eikonal_solver_simple(&solver->memory_struct, solver->verbose);
+    get_solution(solver);
+
+}
diff --git a/src/eikonal/eikonal_solver.h b/src/eikonal/eikonal_solver.h
new file mode 100644
index 00000000..bede96fd
--- /dev/null
+++ b/src/eikonal/eikonal_solver.h
@@ -0,0 +1,26 @@
+#ifndef MONOALG3D_EIKONAL_SOLVER_H
+#define MONOALG3D_EIKONAL_SOLVER_H
+
+#include "../common_types/common_types.h"
+#include "common_def.h"
+#include "../alg/cell/cell.h"
+
+struct eikonal_solver {
+	bool verbose;
+	bool cuda_mem_created;
+	size_t width, height, depth, num_active_cells;
+	size_t iters_per_block, solver_type;
+	real ***speeds;
+	real ***answer;
+	size_t **seeds;
+	bool *** mask;
+    struct cell_node **active_cells;
+	CUDAMEMSTRUCT memory_struct;
+};
+
+struct eikonal_solver * new_eikonal_solver(bool verbose);
+void free_eikonal_solver(struct eikonal_solver *solver);
+void solve_eikonal(struct eikonal_solver *solver);
+void write_alg(struct eikonal_solver *solver, char *filename);
+
+#endif //MONOALG3D_EIKONAL_SOLVER_H
diff --git a/src/logger/logger.h b/src/logger/logger.h
index 58bf9da7..d56ca2f5 100644
--- a/src/logger/logger.h
+++ b/src/logger/logger.h
@@ -8,6 +8,8 @@
 #include <stdbool.h>
 #include <stdio.h>
 
+#define LOG_LINE_SEPARATOR "======================================================================\n"
+
 struct logt {
     FILE *fp;
     bool quiet;
diff --git a/src/main_eikonal.c b/src/main_eikonal.c
index 9d10e676..76dc7f7f 100644
--- a/src/main_eikonal.c
+++ b/src/main_eikonal.c
@@ -1,188 +1,43 @@
 #include <stdlib.h>
-#include <math.h>
+#include "alg/cell/cell.h"
 #include "alg/grid/grid.h"
+#include "3dparty/ini_parser/ini.h"
 #include "config/domain_config.h"
-#include "config/save_mesh_config.h"
-#include "config/stim_config.h"
-#include "utils/heap.h"
+#include "eikonal/eikonal_solver.h"
+#include "3dparty/stb_ds.h"
 #include "logger/logger.h"
-#include "3dparty/ini_parser/ini.h"
-#include "utils/file_utils.h"
+#include "config/config_parser.h"
 
-#define WAVE_SPEED 100.0 // um/ms //TODO: this should be a parameter
-#define APD 400.0 //TODO: make this a parameter
-#define REFRACTORY_PERIOD 200.0 //TODO: make this a parameter
 
-struct heap_point_hash_entry {
-    struct point_3d key;
-    struct heap_point value;
-};
+static int compare_coordinates(const void *a, const void *b) {
 
-static struct point_3d compute_wave_speed(struct point_3d conductivity_tensor) {
-    //Compute wave speed based on conductivity tensor (e.g., using eigenvalues)
-    //Example: For isotropic conductivity, return sqrt(1 / conductivity_tensor)
-    //For anisotropic conductivity, compute wave speed based on eigenvalues
-    //return POINT3D(sqrt(conductivity_tensor.x), sqrt(conductivity_tensor.y), sqrt(conductivity_tensor.z));
-    //return POINT3D(500.0, 500.0, 500.0); // um/ms
-    return SAME_POINT3D(WAVE_SPEED);
-}
+    struct cell_node *coord1 = *(struct  cell_node **) a;
+    struct cell_node *coord2 = *(struct  cell_node **) b;
 
-//TODO: add an hash map in the heap to make the search faster
-static int in_heap(struct point_distance_heap *heap, struct heap_point x, int n) {
-    for(int i = 0; i < n; i++) {
-        struct cell_node *cell = x.grid_cell;
-        if(heap->arr[i].grid_cell->center.x == cell->center.x && heap->arr[i].grid_cell->center.y == cell->center.y && 
-            heap->arr[i].grid_cell->center.z == cell->center.z) {
-            return i;
-        }
+    if (coord1->center.y != coord2->center.y) {
+        return coord1->center.y - coord2->center.y;
     }
-    return -1;
-}
-
-static real euclidean_distance(struct cell_node *cell1, struct cell_node *cell2) {
-
-    real dx = cell1->center.x - cell2->center.x;
-    real dy = cell1->center.y - cell2->center.y;
-    real dz = cell1->center.z - cell2->center.z;
-
-    return sqrt(dx * dx + dy * dy + dz * dz);
-}
-
-void compute_t(struct heap_point x, void *neighbour, struct heap_point_hash_entry *known, struct point_3d condutivity_tensor, struct point_distance_heap *heap, real *elapsed_time, real integrated_time, real dt) {    
-
-    void *neighbour_grid_cell = neighbour;
-    struct cell_node *grid_cell = x.grid_cell;
-    bool has_found;
-
-    struct transition_node *white_neighbor_cell;
 
-    uint16_t neighbour_grid_cell_level = ((struct basic_cell_data *)(neighbour_grid_cell))->level;
-    enum cell_type neighbour_grid_cell_type = ((struct basic_cell_data *)(neighbour_grid_cell))->type;
-
-    if(neighbour_grid_cell_level > grid_cell->cell_data.level) {
-        if(neighbour_grid_cell_type == TRANSITION_NODE) {
-            has_found = false;
-            while(!has_found) {
-                if(neighbour_grid_cell_type == TRANSITION_NODE) {
-                    white_neighbor_cell = (struct transition_node *)neighbour_grid_cell;
-                    if(white_neighbor_cell->single_connector == NULL) {
-                        has_found = true;
-                    } else {
-                        neighbour_grid_cell = white_neighbor_cell->quadruple_connector1;
-                        neighbour_grid_cell_type = ((struct basic_cell_data *)(neighbour_grid_cell))->type;
-                    }
-                } else {
-                    break;
-                }
-            }
-        }
-    } else {
-        if(neighbour_grid_cell_level <= grid_cell->cell_data.level && (neighbour_grid_cell_type == TRANSITION_NODE)) {
-            has_found = false;
-            while(!has_found) {
-                if(neighbour_grid_cell_type == TRANSITION_NODE) {
-                    white_neighbor_cell = (struct transition_node *)(neighbour_grid_cell);
-                    if(white_neighbor_cell->single_connector == 0) {
-                        has_found = true;
-                    } else {
-                        neighbour_grid_cell = white_neighbor_cell->single_connector;
-                        neighbour_grid_cell_type = ((struct basic_cell_data *)(neighbour_grid_cell))->type;
-                    }
-                } else {
-                    break;
-                }
-            }
-        }
+    if (coord1->center.x != coord2->center.x) {
+          return coord1->center.x - coord2->center.x;
     }
 
-    if(neighbour_grid_cell_type == CELL_NODE) {
 
-        struct cell_node *neighbour_cell = (struct cell_node *)(neighbour_grid_cell);                
-        
-        struct heap_point tmp = hmget(known, neighbour_cell->center);
+    return coord1->center.z - coord2->center.z;
 
-        if(!neighbour_cell->active || tmp.grid_cell != NULL) {
-            return;
-        }
-
-        struct heap_point xi;
-        xi.grid_cell = neighbour_cell;
-
-        int index = in_heap(heap, xi, heap->size);        
-
-        real neighbour_time = INFINITY;
-
-        real wave_speed = compute_wave_speed(condutivity_tensor).x;
-
-        //TODO: we have to know the neighbour's direction to calculate the wave speed
-        real tentative_time = x.time + euclidean_distance(grid_cell, neighbour_grid_cell)/wave_speed;
-
-        if(elapsed_time != NULL) {
-            *elapsed_time = tentative_time - integrated_time;        
-            if(*elapsed_time > dt) {
-                heap_push(heap, x); 
-                return;
-            }
-        }
-
-        if(index != -1) {            
-            neighbour_time = heap->arr[index].time;
-
-            if(tentative_time < neighbour_time) {       
-                heap->arr[index].time = tentative_time;
-                heap->arr[index].grid_cell->v = tentative_time;
-            }
-
-        } else {            
-            xi.time = tentative_time;   
-            xi.grid_cell->v = tentative_time;            
-            heap_push(heap, xi);
-        }       
-    }
 }
 
-
-int main(int argc, char **argv) {
+int main(int argc, char *argv[]) {
 
     struct eikonal_options *eikonal_options = new_eikonal_options();
     parse_eikonal_options(argc, argv, eikonal_options);
-    struct grid *grid = new_grid();  
-    
-    if (ini_parse(eikonal_options->config_file, parse_eikonal_config_file, eikonal_options) < 0) {
-        log_error_and_exit("Error: Can't load the config file %s\n", eikonal_options->config_file);
-    }
-
-    if(eikonal_options->dt_was_set == false) {
-        log_error_and_exit("The time step was not set! Exiting!\n");
-    }
+    struct grid *grid = new_grid();
 
-    if(eikonal_options->final_time_was_set == false) {
-        log_error_and_exit("The simulation final time was not set! Exiting!\n");
+    if(ini_parse(eikonal_options->config_file, parse_eikonal_config_file, eikonal_options) < 0) {
+        log_error_and_exit("Error: Can't load the config file %s\n", eikonal_options->config_file);
     }
 
     struct config *domain_config = eikonal_options->domain_config;
-    struct config *save_mesh_config = eikonal_options->save_mesh_config;
-    struct string_voidp_hash_entry *stimuli_configs = eikonal_options->stim_configs;
-
-    bool save_to_file = (save_mesh_config != NULL);
-    char *out_dir_name = NULL;
-    
-    if(save_to_file) {
-        init_config_functions(save_mesh_config, "./shared_libs/libdefault_save_mesh.so", "save_result");
-        GET_PARAMETER_STRING_VALUE_OR_USE_DEFAULT(out_dir_name, save_mesh_config, "output_dir");
-
-        if(out_dir_name == NULL) {
-            log_error("No output directory provided to save the results! Exiting!\n");         
-            exit(EXIT_FAILURE);
-        }
-        
-        create_dir(out_dir_name);
-
-    } else {
-        log_error("No configuration provided to save the results! Exiting!\n");
-        free(out_dir_name);
-        exit(EXIT_FAILURE);
-    }
 
     // Configure the functions and set the mesh domain
     if(domain_config) {
@@ -191,151 +46,51 @@ int main(int argc, char **argv) {
 
         print_domain_config_values(domain_config);
         log_msg(LOG_LINE_SEPARATOR);
-        
+
         bool success = ((set_spatial_domain_fn *)eikonal_options->domain_config->main_function)(domain_config, grid);
 
         if(!success) {
             log_error_and_exit("Error configuring the tissue domain!\n");
-        }     
-        
-        order_grid_cells(grid);
-    }        
-
-    real simulation_time = eikonal_options->final_time;
-    real dt = eikonal_options->dt;    
-
-    struct point_3d condutivity_tensor = SAME_POINT3D(0.00005336);       
-    
-    struct heap_point *data = (struct heap_point *) malloc(grid->num_active_cells * sizeof(struct heap_point));
-    struct point_distance_heap *trial = build_heap(data, 0, grid->num_active_cells);
-
-    struct heap_point_hash_entry *known = NULL;
-    struct heap_point default_entry = {NULL, -1};
-    hmdefault(known, default_entry);
-
-    struct heap_point_hash_entry *refractory = NULL;
-    hmdefault(refractory, default_entry);
-
-    if(stimuli_configs) {
-
-        size_t n = shlen(stimuli_configs);
-        struct time_info time_info = {0.0, 0.0, 0.0, 0};
-
-        if(n > 0) {
-            STIM_CONFIG_HASH_FOR_INIT_FUNCTIONS(stimuli_configs);
         }
 
-        for(int i = 0; i < n; i++) {
-
-            struct string_voidp_hash_entry e = stimuli_configs[i];
-            log_info("Stimulus name: %s\n", e.key);
-            struct config *tmp = (struct config *)e.value;
-            print_stim_config_values(tmp);
-            log_msg(LOG_LINE_SEPARATOR);
-        }
-
-        set_spatial_stim(&time_info, stimuli_configs, grid, false);
-
-        struct config *tmp = NULL;
-
-        uint32_t n_active = grid->num_active_cells;
-        struct cell_node **ac = grid->active_cells;        
-
-        for(size_t k = 0; k < n; k++) {
-
-            real stim_start = 0.0;
-            real stim_dur = 0.0;
-            real stim_period = 0.0;
-
-            tmp = (struct config *)stimuli_configs[k].value;
-            GET_PARAMETER_NUMERIC_VALUE_OR_REPORT_ERROR(real, stim_start, tmp, "start");
-            GET_PARAMETER_NUMERIC_VALUE_OR_REPORT_ERROR(real, stim_dur, tmp, "duration");
-            GET_PARAMETER_NUMERIC_VALUE_OR_USE_DEFAULT(real, stim_period, tmp, "period");
-
-            for(uint32_t i = 0; i < n_active; i++) {
-                real value =  ((real *)(tmp->persistent_data))[i];
-                if(value != 0.0) {
-                    struct heap_point node;
-                    node.grid_cell = ac[i]; 
-                    node.time = 0.0;
-                    node.grid_cell->v = node.time;
-                    hmput(known, ac[i]->center, node);
-                }
-            }
-            
-            // if(stim_period > 0.0) {
-            //     if(time >= stim_start + stim_period) {
-            //         stim_start = stim_start + stim_period;
-            //         sds stim_start_char = sdscatprintf(sdsempty(), "%lf", stim_start);
-            //         shput_dup_value(tmp->config_data, "start", stim_start_char);
-            //         sdsfree(stim_start_char);
-            //     }
-            // }
+        order_grid_cells(grid);
 
-            // time = cur_time;
-        }
+        qsort(grid->active_cells, grid->num_active_cells, sizeof(grid->active_cells[0]), compare_coordinates);
 
     }
 
-    struct time_info ti = ZERO_TIME_INFO;
-    ti.dt = dt;
-    ti.final_t = simulation_time;
-    ti.iteration = 0;     
-
-    CALL_INIT_SAVE_MESH(save_mesh_config);
+    for(int i = 0 ; i < grid->num_active_cells; i++) {
+        int new_pos_x = (grid->active_cells[i]->center.x-grid->active_cells[i]->discretization.x/2)/grid->active_cells[i]->discretization.x;
+        int new_pos_y = (grid->active_cells[i]->center.y-grid->active_cells[i]->discretization.y/2)/grid->active_cells[i]->discretization.y;
+        int new_pos_z = (grid->active_cells[i]->center.z-grid->active_cells[i]->discretization.z/2)/grid->active_cells[i]->discretization.z;
 
-    ((save_mesh_fn *)save_mesh_config->main_function)(&ti, save_mesh_config, grid, NULL, NULL);
-
-    for(int i = 0; i < hmlen(known); i++) {
-        struct heap_point x = known[i].value;
-        for (int j = 0; j <= LEFT; j++) {
-            compute_t(x, x.grid_cell->neighbours[j], known, condutivity_tensor, trial, NULL, 0.0, dt); 
-        }        
+        grid->active_cells[i]->center.x = new_pos_x;
+        grid->active_cells[i]->center.y = new_pos_y;
+        grid->active_cells[i]->center.z = new_pos_z;
     }
-    
-    real integrated_time = 0;    
 
-    while(integrated_time < simulation_time) {           
-        
-        real elapsed_time = 0.0;  
-        ti.current_t += dt;
-        ti.iteration += 1;
+    size_t itersPerBlock = 10, type = 1;
 
-        while(trial->size > 0 && elapsed_time <= dt) {
+    struct eikonal_solver *solver = new_eikonal_solver(false);
+    solver->width = grid->cube_side_length.x/grid->active_cells[0]->discretization.x;
+    solver->height = grid->cube_side_length.y/grid->active_cells[0]->discretization.y;
+    solver->depth = grid->cube_side_length.z/grid->active_cells[0]->discretization.z;
 
-            struct heap_point x = heap_pop(trial);                      
+    solver->solver_type = type;
+    solver->iters_per_block = itersPerBlock;
 
-            hmput(known, x.grid_cell->center, x);
+    size_t *initial_seed = calloc(sizeof(size_t), 3);
+    initial_seed[0] = grid->active_cells[0]->center.x;
+    initial_seed[1] = grid->active_cells[0]->center.y;
+    initial_seed[2] = grid->active_cells[0]->center.z;
 
-            for (int i = 0; i <= LEFT; i++) {
-                compute_t(x, x.grid_cell->neighbours[i], known, condutivity_tensor, trial, &elapsed_time, integrated_time, dt); 
-            }
-            
-            for(int i = 0; i < hmlen(known); i++) {
-                struct heap_point x = known[i].value;
-                if(elapsed_time - x.time > APD) {
-                    hmdel(known, x.grid_cell->center);
-                    x.repolarization_time = x.time + APD;
-                    hmput(refractory, x.grid_cell->center, x);
-                } 
-            }
+    arrput(solver->seeds, initial_seed);
+    solver->active_cells = grid->active_cells;
+    solver->num_active_cells = grid->num_active_cells;
 
-            for(int i = 0; i < hmlen(refractory); i++) {
-                struct heap_point x = refractory[i].value;
-                if(elapsed_time - x.repolarization_time > REFRACTORY_PERIOD) {
-                    hmdel(refractory, x.grid_cell->center);
-                }
-            }
-            
-        }
-        
-        integrated_time += dt;          
+    solve_eikonal(solver);
+    write_alg(solver, "test.alg");
 
-        ((save_mesh_fn *)save_mesh_config->main_function)(&ti, save_mesh_config, grid, NULL, NULL);                
-    
-    }
-    
-    CALL_END_SAVE_MESH(save_mesh_config, grid);
-    free_config_data(save_mesh_config);
+    free_eikonal_solver(solver);
 
 }
diff --git a/src/utils/build.sh b/src/utils/build.sh
index 6de5bfaa..cdfea18b 100755
--- a/src/utils/build.sh
+++ b/src/utils/build.sh
@@ -1,4 +1,4 @@
-UTILS_SOURCE_FILES="search.c stop_watch.c sort.c file_utils.c batch_utils.c heap.c"
-UTILS_HEADER_FILES="utils.h stop_watch.h file_utils.h batch_utils.c heap.h"
+UTILS_SOURCE_FILES="search.c stop_watch.c sort.c file_utils.c batch_utils.c"
+UTILS_HEADER_FILES="utils.h stop_watch.h file_utils.h batch_utils.c"
 
 COMPILE_STATIC_LIB "utils" "$UTILS_SOURCE_FILES" "$UTILS_HEADER_FILES"
diff --git a/src/utils/file_utils.h b/src/utils/file_utils.h
index d9f3e986..44f1ba36 100644
--- a/src/utils/file_utils.h
+++ b/src/utils/file_utils.h
@@ -5,8 +5,6 @@
 #ifndef MONOALG3D_FILE_UTILS_H
 #define MONOALG3D_FILE_UTILS_H
 
-#define LOG_LINE_SEPARATOR "======================================================================\n"
-
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
diff --git a/src/utils/heap.c b/src/utils/heap.c
deleted file mode 100644
index 0e41df24..00000000
--- a/src/utils/heap.c
+++ /dev/null
@@ -1,90 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include "heap.h"
-
-static void heapify(struct point_distance_heap* h, int index) {
-    int left = index * 2 + 1;
-    int right = index * 2 + 2;
-    int min = index;
- 
-    if (left >= h->size || left < 0)
-        left = -1;
-    if (right >= h->size || right < 0)
-        right = -1;
- 
-    if (left != -1 && h->arr[left].time < h->arr[index].time)
-        min = left;
-    if (right != -1 && h->arr[right].time < h->arr[min].time)
-        min = right;
- 
-    if (min != index) {
-        struct heap_point temp = h->arr[min];
-        h->arr[min] = h->arr[index];
-        h->arr[index] = temp;
-        heapify(h, min);
-    }
-}
-
-struct point_distance_heap * build_heap(struct heap_point* data, int size, int capacity) {
-
-    struct point_distance_heap* h = (struct point_distance_heap *) malloc(sizeof(struct point_distance_heap));
-    if (h == NULL) {
-        fprintf(stderr, "Error allocating memory for heap\n");
-        return NULL;
-    }
-
-    h->capacity = capacity; 
-    h->size = size;
-
-    h->arr = data;
-
-    int i = (h->size - 2) / 2;
-    while (i >= 0) {
-        heapify(h, i);
-        i--;
-    }
-    return h;
-}
-
-void insert_helper(struct point_distance_heap* h, int index) {
- 
-    int parent = (index - 1) / 2;
- 
-    if (h->arr[parent].time > h->arr[index].time) {
-        struct heap_point temp = h->arr[parent];
-        h->arr[parent] = h->arr[index];
-        h->arr[index] = temp;
-        insert_helper(h, parent);
-    }
-}
-
-struct heap_point heap_pop(struct point_distance_heap* h) {
-
-    struct heap_point delete_item = {NULL, -1};
- 
-    if (h->size == 0) {
-        printf("\nHeap id empty.");
-        return delete_item;
-    }
- 
-    delete_item = h->arr[0];
- 
-    h->arr[0] = h->arr[h->size - 1];
-    h->size--;
- 
-    heapify(h, 0);
-    return delete_item;
-}
- 
-void heap_push(struct point_distance_heap* h, struct heap_point data) {
- 
-    // Checking if heap is full or not
-    if (h->size < h->capacity) {
-        // Inserting data into an array
-        h->arr[h->size] = data;
-        // Calling insertHelper function
-        insert_helper(h, h->size);
-        // Incrementing size of array
-        h->size++;
-    }
-}
diff --git a/src/utils/heap.h b/src/utils/heap.h
deleted file mode 100644
index 320b4195..00000000
--- a/src/utils/heap.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-#include "../common_types/common_types.h"
-
-struct heap_point {
-    struct cell_node *grid_cell;
-    real time;
-    real repolarization_time;
-};
-
-struct point_distance_heap {
-    struct heap_point * arr;
-    int size;
-    int capacity;
-};
-
-struct point_distance_heap * build_heap(struct heap_point* data, int size, int capacity);
-struct heap_point heap_pop(struct point_distance_heap* h); 
-void heap_push(struct point_distance_heap *h, struct heap_point data);