From 0db53ce9502aa6c4b67e6ad7de786c2ab71c2dd6 Mon Sep 17 00:00:00 2001
From: Sam Tygier <74248560+samtygier-stfc@users.noreply.github.com>
Date: Wed, 14 Sep 2022 17:03:18 +0100
Subject: [PATCH] fdiff_direct_neumann use size_t for offsets (#1339)

offset1 and offset2 are approximately the size of volume so can overflow a 32bit int for a reasonable sized dataset.

Use long long for loops where signed values are needed (for openMP on MSVC), and size_t elsewhere.

Constify values that can be used in loop conditions

* fdiff_direct_periodic protect against overflow when long is 32bit

* fdiff_adjoint_neumann protect against overflow when long is 32bit

* fdiff_adjoint_periodic protect against overflow when long is 32bit

* FiniteDifferenceLibrary update wrapper functions to use size_t for array dimensions

* FiniteDifferenceLibrary use consistent check for z_dim

Co-authored-by: Gemma Fardell <47746591+gfardell@users.noreply.github.com>
---
 CHANGELOG.md                                  |  6 +-
 .../operators/GradientOperator.py             | 18 ++--
 src/Core/FiniteDifferenceLibrary.cpp          | 93 ++++++++++---------
 src/Core/include/FiniteDifferenceLibrary.h    | 14 +--
 4 files changed, 68 insertions(+), 63 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9fae069198..073ae972bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,7 @@
 * 22.x.x
- - added multiple colormaps to show2D
+  - added multiple colormaps to show2D
+  - Fix segfault in GradientOperator due to parameter overflows on windows systems
+  
 * 22.0.0
   - Strongly convex functionality in TotalVariation and FGP_TV Functions
   - Refactored KullbackLeibler function class. Fix bug on gradient method for SIRF objects
@@ -29,7 +31,7 @@
     - TXRMDataReader is deprecated in favour of ZEISSDataReader 
   - GitHub Actions:
     - Update to version 0.1.1 of lauramurgatroyd/build-sphinx-action for building the documentation - ensures docs are always built from cil master
-
+    
 * 21.4.1
  - Removed prints from unittests and cleanup of unittest code.
  - CMake: 
diff --git a/Wrappers/Python/cil/optimisation/operators/GradientOperator.py b/Wrappers/Python/cil/optimisation/operators/GradientOperator.py
index a9af125f07..dc1a275387 100644
--- a/Wrappers/Python/cil/optimisation/operators/GradientOperator.py
+++ b/Wrappers/Python/cil/optimisation/operators/GradientOperator.py
@@ -240,10 +240,10 @@ def adjoint(self, x, out=None):
                        ctypes.POINTER(ctypes.c_float),
                        ctypes.POINTER(ctypes.c_float),
                        ctypes.POINTER(ctypes.c_float),
-                       ctypes.c_long,
-                       ctypes.c_long,
-                       ctypes.c_long,
-                       ctypes.c_long,
+                       ctypes.c_size_t,
+                       ctypes.c_size_t,
+                       ctypes.c_size_t,
+                       ctypes.c_size_t,
                        ctypes.c_int32,
                        ctypes.c_int32,
                        ctypes.c_int32]
@@ -252,9 +252,9 @@ def adjoint(self, x, out=None):
                        ctypes.POINTER(ctypes.c_float),
                        ctypes.POINTER(ctypes.c_float),
                        ctypes.POINTER(ctypes.c_float),
-                       ctypes.c_long,
-                       ctypes.c_long,
-                       ctypes.c_long,
+                       ctypes.c_size_t,
+                       ctypes.c_size_t,
+                       ctypes.c_size_t,
                        ctypes.c_int32,
                        ctypes.c_int32,
                        ctypes.c_int32]
@@ -262,8 +262,8 @@ def adjoint(self, x, out=None):
 cilacc.fdiff2D.argtypes = [ctypes.POINTER(ctypes.c_float),
                        ctypes.POINTER(ctypes.c_float),
                        ctypes.POINTER(ctypes.c_float),
-                       ctypes.c_long,
-                       ctypes.c_long,
+                       ctypes.c_size_t,
+                       ctypes.c_size_t,
                        ctypes.c_int32,
                        ctypes.c_int32,
                        ctypes.c_int32]
diff --git a/src/Core/FiniteDifferenceLibrary.cpp b/src/Core/FiniteDifferenceLibrary.cpp
index 51effb7bd6..b19add331d 100644
--- a/src/Core/FiniteDifferenceLibrary.cpp
+++ b/src/Core/FiniteDifferenceLibrary.cpp
@@ -15,32 +15,33 @@ DLL_EXPORT int openMPtest(int nThreads)
 	return nThreads_running;
 }
 
-int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, size_t nx, size_t ny, size_t nz, size_t nc)
 {
-	size_t volume = nx * ny * nz;
+	const size_t volume = nx * ny * nz;
 
 	const float *inimage = inimagefull;
 	float *outimageX = outimageXfull;
 	float *outimageY = outimageYfull;
 	float *outimageZ = outimageZfull;
 
-	int offset1 = (nz - 1) * nx * ny;	  //ind to beginning of last slice
-	int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
+	const size_t offset1 = (nz - 1) * nx * ny;	  //ind to beginning of last slice
+	const size_t offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
 
-	long c;
+	// For openMP in MSVC loop variables must be signed
+	long long c;
 	
-	int z_dim = nz > 1 ? 1: 0;
+	const bool z_dim = nz > 1;
 
 	for (c = 0; c < nc; c++)
 	{
 #pragma omp parallel
 		{
-			long ind, k, j, i;
+			long long ind, k, j, i;
 			float pix0;
 			//run over all and then fix boundaries
 
 #pragma omp for nowait
-			for (ind = 0; ind < nx * ny * (nz - 1); ind++)
+			for (ind = 0; ind < offset1; ind++)
 			{
 				pix0 = -inimage[ind];
 				outimageX[ind] = pix0 + inimage[ind + 1];
@@ -100,7 +101,7 @@ int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *
 	//now the rest of the channels
 	if (nc > 1)
 	{
-		long ind;
+		long long ind;
 
 		for (c = 0; c < nc - 1; c++)
 		{
@@ -123,29 +124,31 @@ int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *
 
 	return 0;
 }
-int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, size_t nx, size_t ny, size_t nz, size_t nc)
 {
-	size_t volume = nx * ny * nz;
+	const size_t volume = nx * ny * nz;
 
 	const float *inimage = inimagefull;
 	float *outimageX = outimageXfull;
 	float *outimageY = outimageYfull;
 	float *outimageZ = outimageZfull;
 
-	int offset1 = (nz - 1) * nx * ny;	  //ind to beginning of last slice
-	int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
+	const size_t offset1 = (nz - 1) * nx * ny;	  //ind to beginning of last slice
+	const size_t offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
 
-	long c;
+	const bool z_dim = nz > 1;
+
+	long long c;
 	for (c = 0; c < nc; c++)
 	{
 
 #pragma omp parallel
 		{
-			long ind, k;
+			long long ind, k;
 			float pix0;
 			//run over all and then fix boundaries
 #pragma omp for nowait
-			for (ind = 0; ind < nx * ny * (nz - 1); ind++)
+			for (ind = 0; ind < offset1; ind++)
 			{
 				pix0 = -inimage[ind];
 
@@ -177,8 +180,8 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
 			{
 				for (int i = 0; i < nx; i++)
 				{
-					int ind1 = (k * ny * nx);
-					int ind2 = ind1 + (ny - 1) * nx;
+					size_t ind1 = (k * ny * nx);
+					size_t ind2 = ind1 + (ny - 1) * nx;
 
 					outimageY[ind2 + i] = -inimage[ind2 + i] + inimage[ind1 + i];
 				}
@@ -189,14 +192,14 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
 			{
 				for (int j = 0; j < ny; j++)
 				{
-					int ind1 = k * ny * nx + j * nx;
-					int ind2 = ind1 + nx - 1;
+					size_t ind1 = k * ny * nx + j * nx;
+					size_t ind2 = ind1 + nx - 1;
 
 					outimageX[ind2] = -inimage[ind2] + inimage[ind1];
 				}
 			}
 
-			if (nz > 1)
+			if (z_dim)
 			{
 #pragma omp for nowait
 				for (ind = 0; ind < ny * nx; ind++)
@@ -215,7 +218,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
 	//now the rest of the channels
 	if (nc > 1)
 	{
-		long ind;
+		long long ind;
 
 		for (c = 0; c < nc - 1; c++)
 		{
@@ -238,13 +241,13 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
 
 	return 0;
 }
-int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc)
+int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, size_t nx, size_t ny, size_t nz, size_t nc)
 {
 	//runs over full data in x, y, z. then corrects elements for bounday conditions and sums
-	size_t volume = nx * ny * nz;
+	const size_t volume = nx * ny * nz;
 
 	//assumes nx and ny > 1
-	int z_dim = nz - 1;
+	const bool z_dim = nz > 1;
 
 	float *outimage = outimagefull;
 	const float *inimageX = inimageXfull;
@@ -260,20 +263,20 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
 		tempZ = (float *)malloc(volume * sizeof(float));
 	}
 
-	long c;
+	long long c;
 	for (c = 0; c < nc; c++) //just calculating x, y and z in each channel here
 	{
 #pragma omp parallel
 		{
-			long ind, k;
+			long long ind, k;
 
 #pragma omp for
-			for (ind = 1; ind < nx * ny * nz; ind++)
+			for (ind = 1; ind < volume; ind++)
 			{
 				tempX[ind] = -inimageX[ind] + inimageX[ind - 1];
 			}
 #pragma omp for
-			for (ind = nx; ind < nx * ny * nz; ind++)
+			for (ind = nx; ind < volume; ind++)
 			{
 				tempY[ind] = -inimageY[ind] + inimageY[ind - nx];
 			}
@@ -282,7 +285,7 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
 #pragma omp for
 			for (k = 0; k < nz; k++)
 			{
-				for (int j = 0; j < ny; j++)
+				for (long long j = 0; j < ny; j++)
 				{
 					tempX[k * ny * nx + j * nx] = -inimageX[k * ny * nx + j * nx];
 					tempX[k * ny * nx + j * nx + nx - 1] = inimageX[k * ny * nx + j * nx + nx - 2];
@@ -291,7 +294,7 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
 #pragma omp for
 			for (k = 0; k < nz; k++)
 			{
-				for (int i = 0; i < nx; i++)
+				for (long long i = 0; i < nx; i++)
 				{
 					tempY[(k * ny * nx) + i] = -inimageY[(k * ny * nx) + i];
 					tempY[(k * ny * nx) + nx * (ny - 1) + i] = inimageY[(k * ny * nx) + nx * (ny - 2) + i];
@@ -301,7 +304,7 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
 			if (z_dim)
 			{
 #pragma omp for
-				for (ind = nx * ny; ind < nx * ny * nz; ind++)
+				for (ind = nx * ny; ind < volume; ind++)
 				{
 					tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny];
 				}
@@ -341,7 +344,7 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
 	//	//now the rest of the channels
 	if (nc > 1)
 	{
-		long ind;
+		long long ind;
 
 		for (c = 1; c < nc - 1; c++)
 		{
@@ -363,13 +366,13 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
 
 	return 0;
 }
-int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc)
+int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, size_t nx, size_t ny, size_t nz, size_t nc)
 {
 	//runs over full data in x, y, z. then correctects elements for bounday conditions and sums
-	size_t volume = nx * ny * nz;
+	const size_t volume = nx * ny * nz;
 
 	//assumes nx and ny > 1
-	int z_dim = nz - 1;
+	const bool z_dim = nz > 1;
 
 	float *outimage = outimagefull;
 	const float *inimageX = inimageXfull;
@@ -385,12 +388,12 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
 		tempZ = (float *)malloc(volume * sizeof(float));
 	}
 
-	long c;
+	long long c;
 	for (c = 0; c < nc; c++) //just calculating x, y and z in each channel here
 	{
 #pragma omp parallel
 		{
-			long ind, k;
+			long long ind, k;
 
 			//run over all and then fix boundaries
 #pragma omp for
@@ -408,7 +411,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
 #pragma omp for
 			for (k = 0; k < nz; k++)
 			{
-				for (int i = 0; i < nx; i++)
+				for (long long i = 0; i < nx; i++)
 				{
 					tempY[(k * ny * nx) + i] = -inimageY[(k * ny * nx) + i] + inimageY[(k * ny * nx) + nx * (ny - 1) + i];
 				}
@@ -416,7 +419,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
 #pragma omp for
 			for (k = 0; k < nz; k++)
 			{
-				for (int j = 0; j < ny; j++)
+				for (long long j = 0; j < ny; j++)
 				{
 					tempX[k * ny * nx + j * nx] = -inimageX[k * ny * nx + j * nx] + inimageX[k * ny * nx + j * nx + nx - 1];
 				}
@@ -426,7 +429,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
 			{
 
 #pragma omp for
-				for (ind = nx * ny; ind < nx * ny * nz; ind++)
+				for (ind = nx * ny; ind < volume; ind++)
 				{
 					tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny];
 				}
@@ -466,7 +469,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
 	//now the rest of the channels
 	if (nc > 1)
 	{
-		long ind;
+		long long ind;
 
 		for (c = 1; c < nc; c++)
 		{
@@ -488,7 +491,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
 	return 0;
 }
 
-DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction, int nThreads)
+DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, size_t nc, size_t nz, size_t ny, size_t nx, int boundary, int direction, int nThreads)
 {
 	int nThreads_initial;
 	threads_setup(nThreads, &nThreads_initial);
@@ -511,7 +514,7 @@ DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, flo
 	omp_set_num_threads(nThreads_initial);
 	return 0;
 }
-DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction, int nThreads)
+DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, size_t nz, size_t ny, size_t nx, int boundary, int direction, int nThreads)
 {
 	int nThreads_initial;
 	threads_setup(nThreads, &nThreads_initial);
@@ -534,7 +537,7 @@ DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, flo
 	omp_set_num_threads(nThreads_initial);
 	return 0;
 }
-DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction, int nThreads)
+DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, size_t ny, size_t nx, int boundary, int direction, int nThreads)
 {
 	int nThreads_initial;
 	threads_setup(nThreads, &nThreads_initial);
diff --git a/src/Core/include/FiniteDifferenceLibrary.h b/src/Core/include/FiniteDifferenceLibrary.h
index f85fb1538c..e61a135857 100644
--- a/src/Core/include/FiniteDifferenceLibrary.h
+++ b/src/Core/include/FiniteDifferenceLibrary.h
@@ -5,19 +5,19 @@
 #include "dll_export.h"
 #include "utilities.h"
 
-int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc);
-int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc);
-int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc);
-int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc);
+int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, size_t nx, size_t ny, size_t nz, size_t nc);
+int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, size_t nx, size_t ny, size_t nz, size_t nc);
+int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, size_t nx, size_t ny, size_t nz, size_t nc);
+int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, size_t nx, size_t ny, size_t nz, size_t nc);
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 DLL_EXPORT int openMPtest(int nThreads);
-DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction, int nThreads);
-DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction, int nThreads);
-DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction, int nThreads);
+DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, size_t nc, size_t nz, size_t ny, size_t nx, int boundary, int direction, int nThreads);
+DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, size_t nz, size_t ny, size_t nx, int boundary, int direction, int nThreads);
+DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, size_t ny, size_t nx, int boundary, int direction, int nThreads);
 
 #ifdef __cplusplus
 }