diff --git a/psydac/core/bsplines.py b/psydac/core/bsplines.py
index 39e714524..3f19be95e 100644
--- a/psydac/core/bsplines.py
+++ b/psydac/core/bsplines.py
@@ -14,7 +14,8 @@
    - [2] SELALIB, Semi-Lagrangian Library. http://selalib.gforge.inria.fr
 
 """
-import numpy as np
+import cunumpy as xp
+from cunumpy.xp import array_backend
 
 from psydac.core.bsplines_kernels import (find_span_p,
                                           find_spans_p,
@@ -81,7 +82,7 @@ def find_span(knots, degree, x):
         Knot span index.
     """
     x = float(x)
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     return find_span_p(knots, degree, x)
 
 #==============================================================================
@@ -113,12 +114,12 @@ def find_spans(knots, degree, x, out=None):
     spans : array of ints
         Knots span indexes.
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
-    x = np.ascontiguousarray(x, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
+    x = xp.ascontiguousarray(x, dtype=float)
     if out is None:
-        out = np.zeros_like(x, dtype=int)
+        out = xp.zeros_like(x, dtype=int)
     else:
-        assert out.shape == x.shape and out.dtype == np.dtype('int')
+        assert out.shape == x.shape and out.dtype == xp.dtype('int')
 
     find_spans_p(knots, degree, x, out)
     return out
@@ -152,13 +153,13 @@ def basis_funs(knots, degree, x, span, out=None):
         1D array containing the values of ``degree + 1`` non-zero
         Bsplines at location ``x``.
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     # Get native float
     x = float(x)
     if out is None:
-        out = np.zeros(degree + 1, dtype=float)
+        out = xp.zeros(degree + 1, dtype=float)
     else:
-        assert out.shape == (degree + 1,) and out.dtype == np.dtype('float')
+        assert out.shape == (degree + 1,) and out.dtype == xp.dtype('float')
     basis_funs_p(knots, degree, x, span, out)
     return out
 
@@ -190,12 +191,12 @@ def basis_funs_array(knots, degree, span, x, out=None):
         2D array of shape ``(len(x), degree + 1)`` containing the values of ``degree + 1`` non-zero
         Bsplines at each location in ``x``.
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
-    x = np.ascontiguousarray(x, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
+    x = xp.ascontiguousarray(x, dtype=float)
     if out is None:
-        out = np.zeros(x.shape + (degree + 1,), dtype=float)
+        out = xp.zeros(x.shape + (degree + 1,), dtype=float)
     else:
-        assert out.shape == x.shape + (degree + 1,) and out.dtype == np.dtype('float')
+        assert out.shape == x.shape + (degree + 1,) and out.dtype == xp.dtype('float')
     basis_funs_array_p(knots, degree, x, span,  out)
     return out
 
@@ -237,13 +238,13 @@ def basis_funs_1st_der(knots, degree, x, span, out=None):
     ----------
     .. [2] SELALIB, Semi-Lagrangian Library. http://selalib.gforge.inria.fr
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     # Get native float to work on windows
     x = float(x)
     if out is None:
-        out = np.zeros(degree + 1, dtype=float)
+        out = xp.zeros(degree + 1, dtype=float)
     else:
-        assert out.shape == (degree + 1,) and out.dtype == np.dtype('float')
+        assert out.shape == (degree + 1,) and out.dtype == xp.dtype('float')
 
     basis_funs_1st_der_p(knots, degree, x, span, out)
     return out
@@ -288,13 +289,13 @@ def basis_funs_all_ders(knots, degree, x, span, n, normalization='B', out=None):
         ders[i,j] = (d/dx)^i B_k(x) with k=(span-degree+j),
         for 0 <= i <= n and 0 <= j <= degree+1.
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     # Get native float to work on windows
     x = float(x)
     if out is None:
-        out = np.zeros((n + 1, degree + 1), dtype=float)
+        out = xp.zeros((n + 1, degree + 1), dtype=float)
     else:
-        assert out.shape == (n + 1, degree + 1) and out.dtype == np.dtype('float')
+        assert out.shape == (n + 1, degree + 1) and out.dtype == xp.dtype('float')
 
     basis_funs_all_ders_p(knots, degree, x, span, n, normalization == 'M', out)
     return out
@@ -341,18 +342,18 @@ def collocation_matrix(knots, degree, periodic, normalization, xgrid, out=None,
     values of each B-spline basis function :math:`B_j` at all locations :math:`x_i`.
     """
     if xgrid.size == 1:
-        return np.ones((1, 1), dtype=float)
+        return xp.ones((1, 1), dtype=float)
 
-    knots = np.ascontiguousarray(knots, dtype=float)
-    xgrid = np.ascontiguousarray(xgrid, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
+    xgrid = xp.ascontiguousarray(xgrid, dtype=float)
     if out is None:
         nb = len(knots) - degree - 1
         if periodic:
             nb -= degree + 1 - multiplicity
 
-        out = np.zeros((xgrid.shape[0], nb), dtype=float)
+        out = xp.zeros((int(xgrid.shape[0]), int(nb)), dtype=float)
     else:
-        assert out.shape == ((xgrid.shape[0], nb)) and out.dtype == np.dtype('float')
+        assert out.shape == ((int(xgrid.shape[0]), int(nb))) and out.dtype == xp.dtype('float')
 
     bool_normalization = normalization == "M"
     multiplicity = int(multiplicity)
@@ -406,11 +407,11 @@ def histopolation_matrix(knots, degree, periodic, normalization, xgrid, multipli
     two successive grid points.
     """
     # Check that knots are ordered (but allow repeated knots)
-    if not np.all(np.diff(knots) >= 0):
+    if not xp.all(xp.diff(knots) >= 0):
         raise ValueError("Cannot accept knot sequence: {}".format(knots))
 
     # Check that spline degree is non-negative integer
-    if not isinstance(degree, (int, np.integer)):
+    if not isinstance(degree, (int, xp.integer)):
         raise TypeError("Degree {} must be integer, got type {} instead".format(degree, type(degree)))
     if degree < 0:
         raise ValueError("Cannot accept negative degree: {}".format(degree))
@@ -424,26 +425,26 @@ def histopolation_matrix(knots, degree, periodic, normalization, xgrid, multipli
         raise ValueError("Cannot accept 'normalization' parameter: {}".format(normalization))
 
     # Check that grid points are ordered, and do not allow repetitions
-    if not np.all(np.diff(xgrid) > 0):
+    if not xp.all(xp.diff(xgrid) > 0):
         raise ValueError("Grid points must be ordered, with no repetitions: {}".format(xgrid))
 
-    knots = np.ascontiguousarray(knots, dtype=float)
-    xgrid = np.ascontiguousarray(xgrid, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
+    xgrid = xp.ascontiguousarray(xgrid, dtype=float)
     elevated_knots = elevate_knots(knots, degree, periodic, multiplicity=multiplicity)
 
     normalization = normalization == "M"
 
     if out is None:
         if periodic:
-            out = np.zeros((len(xgrid), len(knots) - 2 * degree - 2 + multiplicity), dtype=float)
+            out = xp.zeros((len(xgrid), len(knots) - 2 * int(degree) - 2 + int(multiplicity)), dtype=float)
         else:
-            out = np.zeros((len(xgrid) - 1, len(elevated_knots) - (degree + 1) - 1 - 1), dtype=float)
+            out = xp.zeros((len(xgrid) - 1, len(elevated_knots) - (int(degree) + 1) - 1 - 1), dtype=float)
     else:
         if periodic:
             assert out.shape == (len(xgrid), len(knots) - 2 * degree - 2 + multiplicity)
         else:
             assert out.shape == (len(xgrid) - 1, len(elevated_knots) - (degree + 1) - 1 - 1)
-        assert out.dtype == np.dtype('float')
+        assert out.dtype == xp.dtype('float')
     multiplicity = int(multiplicity)
     histopolation_matrix_p(knots, degree, periodic, normalization, xgrid, check_boundary, elevated_knots, out, multiplicity = multiplicity)
     return out
@@ -474,11 +475,11 @@ def breakpoints(knots, degree, tol=1e-15, out=None):
     breaks : numpy.ndarray (1D)
         Abscissas of all breakpoints.
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     if out is None:
-        out = np.zeros(len(knots), dtype=float)
+        out = xp.zeros(len(knots), dtype=float)
     else:
-        assert out.shape == knots.shape and out.dtype == np.dtype('float')
+        assert out.shape == knots.shape and out.dtype == xp.dtype('float')
     i_final = breakpoints_p(knots, degree, out, tol)
     return out[:i_final]
 
@@ -512,10 +513,10 @@ def greville(knots, degree, periodic, out=None, multiplicity=1):
         Abscissas of all Greville points.
 
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     if out is None:
         n = len(knots) - 2 * degree - 2 + multiplicity if periodic else len(knots) - degree - 1
-        out = np.zeros(n)
+        out = xp.zeros(int(n))
     multiplicity = int(multiplicity)
     greville_p(knots, degree, periodic, out, multiplicity)
     return out
@@ -545,11 +546,11 @@ def elements_spans(knots, degree, out=None):
 
     Examples
     --------
-    >>> import numpy as np
+    >>> import cunumpy as xp
     >>> from psydac.core.bsplines import make_knots, elements_spans
 
     >>> p = 3 ; n = 8
-    >>> grid  = np.arange( n-p+1 )
+    >>> grid  = xp.arange( n-p+1 )
     >>> knots = make_knots( breaks=grid, degree=p, periodic=False )
     >>> spans = elements_spans( knots=knots, degree=p )
     >>> spans
@@ -561,14 +562,14 @@ def elements_spans(knots, degree, out=None):
     2) This function could be written in two lines:
 
        breaks = breakpoints( knots, degree )
-       spans  = np.searchsorted( knots, breaks[:-1], side='right' ) - 1
+       spans  = xp.searchsorted( knots, breaks[:-1], side='right' ) - 1
 
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     if out is None:
-        out = np.zeros(len(knots), dtype=np.int64)
+        out = xp.zeros(len(knots), dtype=xp.int64)
     else:
-        assert out.shape == knots.shape and out.dtype == np.dtype('int64')
+        assert out.shape == knots.shape and out.dtype == xp.dtype('int64')
     i_final = elements_spans_p(knots, degree, out)
     return out[:i_final]
 
@@ -615,7 +616,7 @@ def make_knots(breaks, degree, periodic, multiplicity=1, out=None):
 
     # Consistency checks
     assert len(breaks) > 1
-    assert all( np.diff(breaks) > 0 )
+    assert all( xp.diff(breaks) > 0 )
     assert degree >= 0
     assert 1 <= multiplicity and multiplicity <= degree + 1
     # Cast potential numpy.int64 into python native int
@@ -624,12 +625,12 @@ def make_knots(breaks, degree, periodic, multiplicity=1, out=None):
     if periodic:
         assert len(breaks) > degree
 
-    breaks = np.ascontiguousarray(breaks, dtype=float)
+    breaks = xp.ascontiguousarray(breaks, dtype=float)
     if out is None:
-        out = np.zeros(multiplicity * len(breaks[1:-1]) + 2 + 2 * degree)
+        out = xp.zeros(multiplicity * len(breaks[1:-1]) + 2 + 2 * degree)
     else:
         assert out.shape == (multiplicity * len(breaks[1:-1]) + 2 + 2 * degree,) \
-            and out.dtype == np.dtype('float')
+            and out.dtype == xp.dtype('float')
     make_knots_p(breaks, degree, periodic, out, multiplicity)
 
     return out
@@ -674,25 +675,25 @@ def elevate_knots(knots, degree, periodic, multiplicity=1, tol=1e-15, out=None):
         Knots sequence of spline space of degree p+1.
     """
     multiplicity = int(multiplicity)
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     if out is None:
         if periodic:
-            out = np.zeros(knots.shape[0] + 2, dtype=float)
+            out = xp.zeros(knots.shape[0] + 2, dtype=float)
         else:
             shape = 2*(degree + 2)
             if len(knots) - 2 * (degree + 1) > 0:
-                uniques = (np.diff(knots[degree + 1:-degree - 1]) > tol).nonzero()
+                uniques = (xp.diff(knots[degree + 1:-degree - 1]) > tol).nonzero()
                 shape += multiplicity * (1 + uniques[0].shape[0])
-            out = np.zeros(shape, dtype=float)
+            out = xp.zeros(shape, dtype=float)
     else:
         if periodic:
-            assert out.shape == (knots.shape[0] + 2,) and out.dtype == np.dtype('float')
+            assert out.shape == (knots.shape[0] + 2,) and out.dtype == xp.dtype('float')
         else:
             shape = 2*(degree + 2)
             if len(knots) - 2 * (degree + 1) > 0:
-                uniques = (np.diff(knots[degree + 1:-degree - 1]) > tol).nonzero()
+                uniques = (xp.diff(knots[degree + 1:-degree - 1]) > tol).nonzero()
                 shape += multiplicity * (1 + uniques[0].shape[0])
-            assert out.shape == shape and out.dtype == np.dtype('float')
+            assert out.shape == shape and out.dtype == xp.dtype('float')
 
     elevate_knots_p(knots, degree, periodic, out, multiplicity, tol)
     return out
@@ -749,13 +750,18 @@ def quadrature_grid(breaks, quad_rule_x, quad_rule_w):
     assert min(quad_rule_x) >= -1
     assert max(quad_rule_x) <= +1
 
-    breaks = np.ascontiguousarray(breaks, dtype=float)
+    breaks = xp.ascontiguousarray(breaks, dtype=float)
 
-    quad_rule_x = np.ascontiguousarray( quad_rule_x, dtype=float )
-    quad_rule_w = np.ascontiguousarray( quad_rule_w, dtype=float )
+    if array_backend.backend == "cupy":
+        quad_rule_x = xp.ascontiguousarray(xp.array(quad_rule_x), dtype=float)
+        quad_rule_w = xp.ascontiguousarray( xp.array(quad_rule_w), dtype=float )
+    else:
+        quad_rule_x = xp.ascontiguousarray(quad_rule_x, dtype=float)
+        quad_rule_w = xp.ascontiguousarray( quad_rule_w, dtype=float )
+    
 
-    out1 = np.zeros((len(breaks) - 1, len(quad_rule_x)))
-    out2 = np.zeros_like(out1)
+    out1 = xp.zeros((len(breaks) - 1, len(quad_rule_x)))
+    out2 = xp.zeros_like(out1)
     
     quadrature_grid_p(breaks, quad_rule_x, quad_rule_w, out1, out2)
 
@@ -804,10 +810,10 @@ def basis_ders_on_quad_grid(knots, degree, quad_grid, nders, normalization, offs
 
     Examples
     --------
-    >>> knots = np.array([0.0, 0.0, 0.25, 0.5, 0.75, 1., 1.])
+    >>> knots = xp.array([0.0, 0.0, 0.25, 0.5, 0.75, 1., 1.])
     >>> degree = 2
     >>> bk = breakpoints(knots, degree)
-    >>> grid = np.array([np.linspace(bk[i], bk[i+1], 4, endpoint=False) for i in range(len(bk) - 1)])
+    >>> grid = xp.array([xp.linspace(bk[i], bk[i+1], 4, endpoint=False) for i in range(len(bk) - 1)])
     >>> basis_ders_on_quad_grid(knots, degree, grid, 0, "B")
     array([[[[0.5, 0.28125, 0.125, 0.03125]],
             [[0.5, 0.6875 , 0.75 , 0.6875 ]],
@@ -818,12 +824,12 @@ def basis_ders_on_quad_grid(knots, degree, quad_grid, nders, normalization, offs
     """
     offset = int(offset)
     ne, nq = quad_grid.shape
-    knots = np.ascontiguousarray(knots, dtype=float)
-    quad_grid = np.ascontiguousarray(quad_grid, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
+    quad_grid = xp.ascontiguousarray(quad_grid, dtype=float)
     if out is None:
-        out = np.zeros((ne, degree + 1, nders + 1, nq), dtype=float)
+        out = xp.zeros((ne, degree + 1, nders + 1, nq), dtype=float)
     else:
-        assert out.shape == (ne, degree + 1, nders + 1, nq) and out.dtype == np.dtype('float')
+        assert out.shape == (ne, degree + 1, nders + 1, nq) and out.dtype == xp.dtype('float')
     basis_ders_on_quad_grid_p(knots, degree, quad_grid, nders, normalization == 'M', offset, out)
     return out
 
@@ -862,11 +868,11 @@ def basis_integrals(knots, degree, out=None):
     to (len(knots)-degree-1). In the periodic case the last (degree) values in
     the array are redundant, as they are a copy of the first (degree) values.
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
     if out is None:
-        out = np.zeros(len(knots) - degree - 1, dtype=float)
+        out = xp.zeros(len(knots) - degree - 1, dtype=float)
     else:
-        assert out.shape is (len(knots) - degree - 1,) and out.dtype == np.dtype('float')
+        assert out.shape is (len(knots) - degree - 1,) and out.dtype == xp.dtype('float')
     basis_integrals_p(knots, degree, out)
     return out
 
@@ -904,12 +910,12 @@ def cell_index(breaks, i_grid, tol=1e-15, out=None):
         ``cell_index[i]`` is the index of the cell in which
         ``i_grid[i]`` belong.
     """
-    breaks = np.ascontiguousarray(breaks, dtype=float)
-    i_grid = np.ascontiguousarray(i_grid, dtype=float)
+    breaks = xp.ascontiguousarray(breaks, dtype=float)
+    i_grid = xp.ascontiguousarray(i_grid, dtype=float)
     if out is None:
-        out = np.zeros_like(i_grid, dtype=np.int64)
+        out = xp.zeros_like(i_grid, dtype=xp.int64)
     else:
-        assert out.shape == i_grid.shape and out.dtype == np.dtype('int64')
+        assert out.shape == i_grid.shape and out.dtype == xp.dtype('int64')
     status = cell_index_p(breaks, i_grid, tol, out)
     if status == -1:
         raise ValueError("Encountered a point that was outside of the domain")
@@ -960,13 +966,13 @@ def basis_ders_on_irregular_grid(knots, degree, i_grid, cell_index, nders, norma
         . il: local basis function   (0 <= il <= degree)
         . id: derivative             (0 <= id <= nders )
     """
-    knots = np.ascontiguousarray(knots, dtype=float)
-    i_grid = np.ascontiguousarray(i_grid, dtype=float)
+    knots = xp.ascontiguousarray(knots, dtype=float)
+    i_grid = xp.ascontiguousarray(i_grid, dtype=float)
     if out is None:
         nx = i_grid.shape[0]
-        out = np.zeros((nx, degree + 1, nders + 1), dtype=float)
+        out = xp.zeros((nx, degree + 1, nders + 1), dtype=float)
     else:
-        assert out.shape == (nx, degree + 1, nders + 1) and out.dtype == np.dtype('float')
+        assert out.shape == (nx, degree + 1, nders + 1) and out.dtype == xp.dtype('float')
     basis_ders_on_irregular_grid_p(knots, degree, i_grid, cell_index, nders, normalization == 'M', out)
     return out
 
@@ -993,7 +999,7 @@ def _refinement_matrix_one_stage(t, p, knots):
 
     Returns
     -------
-    mat : np.array[:,:]
+    mat : xp.array[:,:]
         h-refinement matrix.
 
     new_knots : array_like
@@ -1016,7 +1022,7 @@ def alpha_function(i, k, t, n, p, knots):
 
     n = len(knots) - p - 1
 
-    mat = np.zeros((n+1,n))
+    mat = xp.zeros((n+1,n))
 
     left = find_span( knots, p, t )
 
@@ -1036,7 +1042,7 @@ def alpha_function(i, k, t, n, p, knots):
     # ...
 
     # ...
-    new_knots = np.zeros(n+1+p+1)
+    new_knots = xp.zeros(n+1+p+1)
 
     new_knots[:left+1] = knots[:left+1]
     new_knots[left+1] = t
@@ -1057,7 +1063,7 @@ def hrefinement_matrix(ts, p, knots):
 
     Parameters
     ----------
-    ts: np.array
+    ts: xp.array
         array containing the knots to be inserted
 
     p: int
@@ -1068,18 +1074,18 @@ def hrefinement_matrix(ts, p, knots):
 
     Returns
     -------
-    mat : np.array[:,:]
+    mat : xp.array[:,:]
         h-refinement matrix
 
     Examples
     --------
-    >>> import numpy as np
+    >>> import cunumpy as xp
     >>> from psydac.core.bsplines import make_knots
     >>> from psydac.core.bsplines import hrefinement_matrix
-    >>> grid = np.linspace(0.,1.,5)
+    >>> grid = xp.linspace(0.,1.,5)
     >>> degree = 2
     >>> knots = make_knots(grid, degree, periodic=False)
-    >>> ts    = np.array([0.1, 0.2, 0.4, 0.5, 0.7, 0.8])
+    >>> ts    = xp.array([0.1, 0.2, 0.4, 0.5, 0.7, 0.8])
     >>> hrefinement_matrix(ts, p, knots)
     array([[1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
            [0.6 , 0.4 , 0.  , 0.  , 0.  , 0.  ],
@@ -1097,11 +1103,11 @@ def hrefinement_matrix(ts, p, knots):
 
     m = len(ts)
     n = len(knots) - p - 1
-    out = np.eye(n)
+    out = xp.eye(n)
 
     for i in range(m):
         t = ts[i]
         mat, knots = _refinement_matrix_one_stage(t, p, knots)
-        out = np.matmul(mat, out)
+        out = xp.matmul(mat, out)
 
     return out
diff --git a/psydac/core/bsplines_kernels.py b/psydac/core/bsplines_kernels.py
index c8ba08e92..ca8ef739d 100644
--- a/psydac/core/bsplines_kernels.py
+++ b/psydac/core/bsplines_kernels.py
@@ -4,7 +4,7 @@
 
 from pyccel.decorators import pure
 from numpy import shape, abs
-import numpy as np
+import cunumpy as xp
 from typing import Final
 
 # Auxiliary functions needed for the bsplines kernels.
@@ -238,8 +238,8 @@ def basis_funs_p(knots: 'float[:]', degree: int, x: float, span: int, out: 'floa
     out[0] = 1.0
     if degree == 0:
         return
-    left = np.zeros(degree, dtype=float)
-    right = np.zeros(degree, dtype=float)
+    left = xp.zeros(degree, dtype=float)
+    right = xp.zeros(degree, dtype=float)
 
     for j in range(degree):
         left[j]  = x - knots[span - j]
@@ -325,7 +325,7 @@ def basis_funs_1st_der_p(knots: 'float[:]', degree: int, x: float, span: int, ou
 
     # Compute nonzero basis functions and knot differences for splines
     # up to degree deg-1
-    values = np.zeros(degree)
+    values = xp.zeros(degree)
     basis_funs_p(knots, degree-1, x, span, values)
 
     # Compute derivatives at x using formula based on difference of splines of
@@ -395,13 +395,13 @@ def basis_funs_all_ders_p(knots: 'float[:]', degree: int, x: float, span: int, n
     .. [1] L. Piegl and W. Tiller. The NURBS Book, 2nd ed.,
         Springer-Verlag Berlin Heidelberg GmbH, 1997.
     """
-    sh_a  = np.empty(2)
-    sh_b  = np.empty(2)
-    left  = np.empty(degree)
-    right = np.empty(degree)
-    ndu   = np.empty((degree+1, degree+1))
-    a     = np.empty((2, degree+1))
-    temp_d = np.empty((1, 1))
+    sh_a  = xp.empty(2)
+    sh_b  = xp.empty(2)
+    left  = xp.empty(degree)
+    right = xp.empty(degree)
+    ndu   = xp.empty((degree+1, degree+1))
+    a     = xp.empty((2, degree+1))
+    temp_d = xp.empty((1, 1))
     # Number of derivatives that need to be effectively computed
     # Derivatives higher than degree are = 0.
     ne = min(n, degree)
@@ -444,10 +444,14 @@ def basis_funs_all_ders_p(knots: 'float[:]', degree: int, x: float, span: int, n
             j2 = k-1 if (r-1 <= pk) else degree-r
 
             a[s2, j1:j2 + 1] = (a[s1, j1:j2 + 1] - a[s1, j1 - 1:j2]) * ndu[pk + 1, rk + j1:rk + j2 + 1]
-            # temp_d[:, :] = np.matmul(a[s2:s2 + 1, j1:j2 + 1], ndu[rk + j1:rk + j2 + 1, pk: pk + 1])
+            # temp_d[:, :] = xp.matmul(a[s2:s2 + 1, j1:j2 + 1], ndu[rk + j1:rk + j2 + 1, pk: pk + 1])
+            
+            # sh_a[:] = shape(a[s2:s2 + 1, j1:j2 + 1])
+            sh_a[:] = xp.array(a[s2:s2 + 1, j1:j2 + 1].shape, dtype=sh_a.dtype)
+            
+            # sh_b[:] = shape(ndu[rk + j1:rk + j2 + 1, pk: pk + 1])
+            sh_b[:] = xp.array(ndu[rk + j1:rk + j2 + 1, pk: pk + 1].shape, dtype=sh_b.dtype)
             
-            sh_a[:] = shape(a[s2:s2 + 1, j1:j2 + 1])
-            sh_b[:] = shape(ndu[rk + j1:rk + j2 + 1, pk: pk + 1])
             
             if sh_a[0] == 0 or sh_a[1] == 0 or sh_b[0] == 0 or sh_b[1] == 0:
                 temp_d[:, :] = 0.
@@ -554,8 +558,8 @@ def collocation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, normali
     # Number of evaluation points
     nx = len(xgrid)
 
-    basis = np.zeros((nx, degree + 1))
-    spans = np.zeros(nx, dtype=int)
+    basis = xp.zeros((nx, degree + 1))
+    spans = xp.zeros(nx, dtype=int)
     find_spans_p(knots, degree, xgrid, spans)
     basis_funs_array_p(knots, degree, xgrid, spans, basis)
 
@@ -572,7 +576,7 @@ def collocation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, normali
             for i in range(nx):
                 out[i, spans[i] - degree:spans[i] + 1] = basis[i, :]
     else:
-        integrals = np.zeros(knots.shape[0] - degree - 1)
+        integrals = xp.zeros(knots.shape[0] - degree - 1)
         basis_integrals_p(knots, degree, integrals)
         scaling = 1.0 / integrals
         if periodic:
@@ -642,7 +646,7 @@ def histopolation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, norma
     nx = len(xgrid)
 
     # In periodic case, make sure that evaluation points include domain boundaries
-    xgrid_new = np.zeros(len(xgrid) + 2)
+    xgrid_new = xp.zeros(len(xgrid) + 2)
     actual_len = len(xgrid)
     if periodic:
         if check_boundary:
@@ -678,7 +682,7 @@ def histopolation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, norma
     #  . cannot use M-splines in analytical formula for histopolation matrix
     #  . always use non-periodic splines to avoid circulant matrix structure
     nb_elevated = len(elevated_knots) - (degree + 1) - 1
-    colloc = np.zeros((actual_len, nb_elevated))
+    colloc = xp.zeros((actual_len, nb_elevated))
     collocation_matrix_p(elevated_knots,
                             degree + 1,
                             False,
@@ -690,7 +694,7 @@ def histopolation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, norma
     m = colloc.shape[0] - 1
     n = colloc.shape[1] - 1
 
-    spans = np.zeros(colloc.shape[0], dtype=int)
+    spans = xp.zeros(colloc.shape[0], dtype=int)
     for i in range(colloc.shape[0]):
         local_span = 0
         for j in range(colloc.shape[1]):
@@ -701,7 +705,7 @@ def histopolation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, norma
 
     # Compute histopolation matrix from collocation matrix of higher degree
     if periodic:
-        temp_array = np.zeros((m, n))
+        temp_array = xp.zeros((m, n))
         H = temp_array[:, :]
     else:
         H = out[:, :]
@@ -709,24 +713,24 @@ def histopolation_matrix_p(knots: 'float[:]', degree: int, periodic: bool, norma
     if normalization:
         for i in range(m):
             # Indices of first/last non-zero elements in row of collocation matrix
-            jstart = spans[i] - (degree + 1)
-            jend = min(spans[i + 1], n)
+            jstart = int(spans[i] - (degree + 1))
+            jend = int(min(spans[i + 1], n))
             # Compute non-zero values of histopolation matrix
             for j in range(1 + jstart, jend + 1):
-                # s = np.sum(colloc[i, 0:j]) - np.sum(colloc[i + 1, 0:j])
+                # s = xp.sum(colloc[i, 0:j]) - xp.sum(colloc[i + 1, 0:j])
                 s = sum_vec(colloc[i, 0:j]) - sum_vec(colloc[i + 1, 0:j])
                 H[i, j - 1] = s
 
     else:
-        integrals = np.zeros(knots.shape[0] - degree - 1)
+        integrals = xp.zeros(knots.shape[0] - degree - 1)
         basis_integrals_p(knots, degree, integrals)
         for i in range(m):
             # Indices of first/last non-zero elements in row of collocation matrix
-            jstart = spans[i] - (degree + 1)
-            jend = min(spans[i + 1], n)
+            jstart = int(spans[i] - (degree + 1))
+            jend = int(min(spans[i + 1], n))
             # Compute non-zero values of histopolation matrix
             for j in range(1 + jstart, jend + 1):
-                # s = np.sum(colloc[i, 0:j]) - np.sum(colloc[i + 1, 0:j])
+                # s = xp.sum(colloc[i, 0:j]) - xp.sum(colloc[i + 1, 0:j])
                 s = sum_vec(colloc[i, 0:j]) - sum_vec(colloc[i + 1, 0:j])
                 H[i, j - 1] = s * integrals[j - 1]
 
@@ -758,9 +762,9 @@ def merge_sort(a: 'float[:]'):
     if len(a) != 1 and len(a) != 0:
         n = len(a)
 
-        a1 = np.zeros(n // 2)
+        a1 = xp.zeros(n // 2)
         a1[:] = a[:n // 2]
-        a2 = np.zeros(n - n // 2)
+        a2 = xp.zeros(n - n // 2)
         a2[:] = a[n // 2:]
 
         merge_sort(a1)
@@ -815,8 +819,8 @@ def breakpoints_p(knots: 'float[:]', degree: int, out: 'float[:]', tol: float =
         Last meaningful index + 1, e.g. the actual interesting result
         is ``out[:last_index]``.
     """
-    # knots = np.array(knots)
-    # diff  = np.append(True, abs(np.diff(knots[degree:-degree]))>tol)
+    # knots = xp.array(knots)
+    # diff  = xp.append(True, abs(xp.diff(knots[degree:-degree]))>tol)
     # return knots[degree:-degree][diff]
 
     out[0] = knots[degree]
@@ -914,9 +918,9 @@ def elements_spans_p(knots: 'float[:]', degree: int, out: 'int[:]'):
     2) This function could be written in two lines:
 
        breaks = breakpoints( knots, degree )
-       spans  = np.searchsorted( knots, breaks[:-1], side='right' ) - 1
+       spans  = xp.searchsorted( knots, breaks[:-1], side='right' ) - 1
     """
-    temp_array = np.zeros(len(knots))
+    temp_array = xp.zeros(len(knots))
 
     actual_len = breakpoints_p(knots, degree, temp_array)
 
@@ -1158,15 +1162,15 @@ def basis_ders_on_quad_grid_p(knots: 'float[:]', degree: int, quad_grid: 'float[
     ne = quad_grid.shape[0]
     nq = quad_grid.shape[1]
     if normalization:
-        integrals = np.zeros(knots.shape[0] - degree - 1)
+        integrals = xp.zeros(knots.shape[0] - degree - 1)
         basis_integrals_p(knots, degree, integrals)
         scaling = 1.0 /integrals
 
-    temp_spans = np.zeros(len(knots), dtype=int)
+    temp_spans = xp.zeros(len(knots), dtype=int)
     actual_index = elements_spans_p(knots, degree, temp_spans)
     spans = temp_spans[:actual_index]
 
-    ders = np.zeros((nders + 1, degree + 1))
+    ders = xp.zeros((nders + 1, degree + 1))
 
     for ie in range(ne):
         xx = quad_grid[ie, :]
@@ -1216,8 +1220,8 @@ def cell_index_p(breaks: 'float[:]', i_grid: 'float[:]', tol: float, out: 'int[:
     nbk = len(breaks)
 
     # Check if there are points outside the domain
-    if np.min(i_grid) < breaks[0] - tol/2: return -1
-    if np.max(i_grid) > breaks[nbk - 1] + tol/2: return -1
+    if xp.min(i_grid) < breaks[0] - tol/2: return -1
+    if xp.max(i_grid) > breaks[nbk - 1] + tol/2: return -1
 
     current_index = 0
     while current_index < nx:
@@ -1329,13 +1333,13 @@ def basis_ders_on_irregular_grid_p(knots: 'float[:]', degree: int,
     """
     nx = i_grid.shape[0]
     if normalization:
-        scaling = np.zeros(knots.shape[0] - degree - 1)
+        scaling = xp.zeros(knots.shape[0] - degree - 1)
         basis_integrals_p(knots, degree, scaling)
         scaling = 1.0 / scaling
 
-    ders = np.zeros((nders + 1, degree + 1))
+    ders = xp.zeros((nders + 1, degree + 1))
 
-    temp_spans = np.zeros(len(knots), dtype=int)
+    temp_spans = xp.zeros(len(knots), dtype=int)
     actual_index = elements_spans_p(knots, degree, temp_spans)
     spans = temp_spans[:actual_index]
 
diff --git a/psydac/core/field_evaluation_kernels.py b/psydac/core/field_evaluation_kernels.py
index 242f7ff7c..07d6d5dfd 100644
--- a/psydac/core/field_evaluation_kernels.py
+++ b/psydac/core/field_evaluation_kernels.py
@@ -1,4 +1,4 @@
-import numpy as np
+import cunumpy as xp
 from typing import TypeVar
 
 T = TypeVar('T', float, complex)
@@ -57,7 +57,7 @@ def eval_fields_3d_no_weights(nc1: int, nc2: int, nc3: int, f_p1: int, f_p2: int
     out_fields: ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -133,7 +133,7 @@ def eval_fields_2d_no_weights(nc1: int, nc2: int, f_p1: int, f_p2: int, k1: int,
     out_fields: ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -191,7 +191,7 @@ def eval_fields_1d_no_weights(nc1: int, f_p1: int, k1: int,
     out_fields: ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, out_fields.shape[1]))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, out_fields.shape[1]))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -254,7 +254,7 @@ def eval_fields_3d_irregular_no_weights(np1: int, np2: int, np3: int, f_p1: int,
     out_fields : ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
 
     for i_p_1 in range(np1):
         i_cell_1 = cell_index_1[i_p_1]
@@ -328,7 +328,7 @@ def eval_fields_2d_irregular_no_weights(np1: int, np2: int, f_p1: int, f_p2: int
     out_fields: ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
 
     for i_p_1 in range(np1):
         i_cell_1 = cell_index_1[i_p_1]
@@ -387,7 +387,7 @@ def eval_fields_1d_irregular_no_weights(np1: int, f_p1: int,
     out_fields: ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, out_fields.shape[1]))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, out_fields.shape[1]))
 
     for i_p_1 in range(np1):
         i_cell_1 = cell_index_1[i_p_1]
@@ -457,11 +457,11 @@ def eval_fields_3d_weighted(nc1: int, nc2: int, nc3: int, f_p1: int, f_p2: int,
     out_fields: ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_fields = np.zeros_like(glob_arr_coeff, shape=(k1, k2, k3, out_fields.shape[3]))
-    arr_weights = np.zeros((k1, k2, k3))
+    arr_fields = xp.zeros_like(glob_arr_coeff, shape=(k1, k2, k3, out_fields.shape[3]))
+    arr_weights = xp.zeros((k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -556,11 +556,11 @@ def eval_fields_2d_weighted(nc1: int, nc2: int, f_p1: int, f_p2: int, k1: int, k
     out_fields: ndarray of float
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(global_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2))
+    arr_coeff_fields = xp.zeros_like(global_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2))
 
-    arr_fields = np.zeros_like(global_arr_coeff, shape=(k1, k2, out_fields.shape[2]))
-    arr_weights = np.zeros((k1, k2))
+    arr_fields = xp.zeros_like(global_arr_coeff, shape=(k1, k2, out_fields.shape[2]))
+    arr_weights = xp.zeros((k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -636,11 +636,11 @@ def eval_fields_1d_weighted(nc1: int, f_p1: int, k1: int,
     out_fields: ndarray of float
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(global_arr_coeff, shape=(1 + f_p1, out_fields.shape[1]))
-    arr_coeff_weights = np.zeros((1 + f_p1))
+    arr_coeff_fields = xp.zeros_like(global_arr_coeff, shape=(1 + f_p1, out_fields.shape[1]))
+    arr_coeff_weights = xp.zeros((1 + f_p1))
 
-    arr_fields = np.zeros_like(global_arr_coeff, shape=(k1, out_fields.shape[1]))
-    arr_weights = np.zeros((k1))
+    arr_fields = xp.zeros_like(global_arr_coeff, shape=(k1, out_fields.shape[1]))
+    arr_weights = xp.zeros((k1))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -725,10 +725,10 @@ def eval_fields_3d_irregular_weighted(np1: int, np2: int, np3: int, f_p1: int, f
     out_fields : ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeff_fields = xp.zeros_like(glob_arr_coeff, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3, out_fields.shape[3]))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    temp_fields = np.zeros_like(glob_arr_coeff, shape=out_fields.shape[3])
+    temp_fields = xp.zeros_like(glob_arr_coeff, shape=out_fields.shape[3])
 
     for i_p_1 in range(np1):
         i_cell_1 = cell_index_1[i_p_1]
@@ -818,10 +818,10 @@ def eval_fields_2d_irregular_weighted(np1: int, np2: int, f_p1: int, f_p2: int,
     out_fields : ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    arr_coeff_fields = np.zeros_like(global_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2))
+    arr_coeff_fields = xp.zeros_like(global_arr_coeff, shape=(1 + f_p1, 1 + f_p2, out_fields.shape[2]))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2))
 
-    temp_fields = np.zeros_like(global_arr_coeff, shape=out_fields.shape[2])
+    temp_fields = xp.zeros_like(global_arr_coeff, shape=out_fields.shape[2])
 
     for i_p_1 in range(np1):
         i_cell_1 = cell_index_1[i_p_1]
@@ -895,7 +895,7 @@ def eval_fields_1d_irregular_weighted(np1: int, f_p1: int,
     out_fields : ndarray of floats
         Evaluated fields, filled with the correct values by the function
     """
-    temp_fields = np.zeros_like(global_arr_coeff, shape=out_fields.shape[1])
+    temp_fields = xp.zeros_like(global_arr_coeff, shape=out_fields.shape[1])
 
     for i_p_1 in range(np1):
         i_cell_1 = cell_index_1[i_p_1]
@@ -979,21 +979,21 @@ def eval_jac_det_3d(nc1: int, nc2: int, nc3: int, f_p1: int, f_p2: int, f_p3: in
         Jacobian determinant on the grid.
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x3 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x3 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x3 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x3 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
 
-    arr_z_x1 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x2 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x3 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x1 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x2 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x3 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -1125,14 +1125,14 @@ def eval_jac_det_2d(nc1: int, nc2: int, f_p1: int, f_p2: int, k1: int, k2: int,
         Jacobian determinant on the grid.
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -1244,9 +1244,9 @@ def eval_jac_det_irregular_3d(np1: int, np2: int, np3: int, f_p1: int, f_p2: int
     """
 
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
     temp_x_x1 = arr_coeffs_x[0,0,0]-arr_coeffs_x[0,0,0]
     temp_x_x2 = arr_coeffs_y[0,0,0]-arr_coeffs_y[0,0,0]
@@ -1374,8 +1374,8 @@ def eval_jac_det_irregular_2d(np1: int, np2: int, f_p1: int, f_p2: int, cell_ind
         Coefficients of the X2 field
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
     temp_x_x1 = arr_coeffs_x[0,0]-arr_coeffs_x[0,0]
     temp_x_x2 = arr_coeffs_y[0,0]-arr_coeffs_y[0,0]
@@ -1490,32 +1490,32 @@ def eval_jac_det_3d_weights(nc1: int, nc2: int, nc3: int, f_p1: int, f_p2: int,
         Jacobian determinant on the grid
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_x = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_y = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_z = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_x = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_y = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_z = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x3 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x3 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x3 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x3 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
 
-    arr_z_x1 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x2 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x3 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x1 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x2 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x3 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_weights = np.zeros((k1, k2, k3))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_weights = xp.zeros((k1, k2, k3))
 
-    arr_weights_x1 = np.zeros((k1, k2, k3))
-    arr_weights_x2 = np.zeros((k1, k2, k3))
-    arr_weights_x3 = np.zeros((k1, k2, k3))
+    arr_weights_x1 = xp.zeros((k1, k2, k3))
+    arr_weights_x2 = xp.zeros((k1, k2, k3))
+    arr_weights_x3 = xp.zeros((k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -1703,24 +1703,24 @@ def eval_jac_det_2d_weights(nc1: int, nc2: int, f_p1: int, f_p2: int, k1: int, k
         Jacobian determinant on the grid
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_x = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_y = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_x = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_y = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2))
 
-    arr_weights = np.zeros((k1, k2))
+    arr_weights = xp.zeros((k1, k2))
 
-    arr_weights_x1 = np.zeros((k1, k2))
-    arr_weights_x2 = np.zeros((k1, k2))
+    arr_weights_x1 = xp.zeros((k1, k2))
+    arr_weights_x2 = xp.zeros((k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -1872,11 +1872,11 @@ def eval_jac_det_irregular_3d_weights(np1: int, np2: int, np3: int, f_p1: int, f
         Jacobian determinant on the grid
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_coeffs_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
 
     temp_x = arr_coeffs_x[0,0,0]-arr_coeffs_x[0,0,0]
     temp_y = arr_coeffs_y[0,0,0]-arr_coeffs_y[0,0,0]
@@ -2059,10 +2059,10 @@ def eval_jac_det_irregular_2d_weights(np1: int, np2: int, f_p1: int, f_p2: int,
         Jacobian determinant on the grid
     """
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_coeffs_weights = np.zeros((1 + f_p1, 1 + f_p2))
+    arr_coeffs_weights = xp.zeros((1 + f_p1, 1 + f_p2))
 
     temp_x = arr_coeffs_x[0,0]-arr_coeffs_x[0,0]
     temp_y = arr_coeffs_y[0,0]-arr_coeffs_y[0,0]
@@ -2205,23 +2205,23 @@ def eval_jacobians_3d(nc1: int, nc2: int, nc3: int, f_p1: int, f_p2: int, f_p3:
         Jacobian matrix on the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x3 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x3 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x3 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x3 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
 
-    arr_z_x1 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x2 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x3 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x1 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x2 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x3 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -2363,16 +2363,16 @@ def eval_jacobians_2d(nc1: int, nc2: int, f_p1: int, f_p2: int, k1: int, k2: int
         Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -2490,11 +2490,11 @@ def eval_jacobians_irregular_3d(np1: int, np2: int, np3: int, f_p1: int, f_p2: i
         Jacobian matrix on the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
     temp_x_x1 = arr_coeffs_x[0,0,0]-arr_coeffs_x[0,0,0]
     temp_x_x2 = arr_coeffs_y[0,0,0]-arr_coeffs_y[0,0,0]
@@ -2631,10 +2631,10 @@ def eval_jacobians_irregular_2d(np1: int, np2: int, f_p1: int, f_p2: int, cell_i
         Jacobian matrix on the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
     temp_x_x1 = arr_coeffs_x[0,0]-arr_coeffs_x[0,0]
     temp_x_x2 = arr_coeffs_y[0,0]-arr_coeffs_y[0,0]
@@ -2754,34 +2754,34 @@ def eval_jacobians_3d_weights(nc1: int, nc2: int, nc3: int,  f_p1: int, f_p2: in
         Jacobian matrix on the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_x = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_y = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_z = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_x = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_y = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_z = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x3 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x3 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x3 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x3 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
 
-    arr_z_x1 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x2 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x3 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x1 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x2 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x3 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_weights = np.zeros((k1, k2, k3))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_weights = xp.zeros((k1, k2, k3))
 
-    arr_weights_x1 = np.zeros((k1, k2, k3))
-    arr_weights_x2 = np.zeros((k1, k2, k3))
-    arr_weights_x3 = np.zeros((k1, k2, k3))
+    arr_weights_x1 = xp.zeros((k1, k2, k3))
+    arr_weights_x2 = xp.zeros((k1, k2, k3))
+    arr_weights_x3 = xp.zeros((k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -2977,25 +2977,25 @@ def eval_jacobians_2d_weights(nc1: int, nc2: int,  f_p1: int, f_p2: int, k1: int
         Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_x = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_y = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_x = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_y = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2))
-    arr_weights = np.zeros((k1, k2))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2))
+    arr_weights = xp.zeros((k1, k2))
 
-    arr_weights_x1 = np.zeros((k1, k2))
-    arr_weights_x2 = np.zeros((k1, k2))
+    arr_weights_x1 = xp.zeros((k1, k2))
+    arr_weights_x2 = xp.zeros((k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -3154,13 +3154,13 @@ def eval_jacobians_irregular_3d_weights(np1: int, np2: int, np3: int, f_p1: int,
         Jacobian matrix on the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_coeffs_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
 
     temp_x = arr_coeffs_x[0,0,0]-arr_coeffs_x[0,0,0]
     temp_y = arr_coeffs_y[0,0,0]-arr_coeffs_y[0,0,0]
@@ -3350,12 +3350,12 @@ def eval_jacobians_irregular_2d_weights(np1: int, np2: int, f_p1: int, f_p2: int
         Jacobian matrix on the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_coeffs_weights = np.zeros((1 + f_p1, 1 + f_p2))
+    arr_coeffs_weights = xp.zeros((1 + f_p1, 1 + f_p2))
 
 
     temp_x = arr_coeffs_x[0,0]-arr_coeffs_x[0,0]
@@ -3506,23 +3506,23 @@ def eval_jacobians_inv_3d(nc1: int, nc2: int, nc3: int,  f_p1: int, f_p2: int,
         Inverse of the Jacobian matrix on the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x3 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x3 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x3 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x3 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
 
-    arr_z_x1 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x2 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x3 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x1 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x2 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x3 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -3677,16 +3677,16 @@ def eval_jacobians_inv_2d(nc1: int, nc2: int,  f_p1: int, f_p2: int, k1: int, k2
         Inverse of the Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -3806,11 +3806,11 @@ def eval_jacobians_inv_irregular_3d(np1: int, np2: int, np3: int, f_p1: int, f_p
         Inverse of the Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
     temp_x_x1 = arr_coeffs_x[0,0,0]-arr_coeffs_x[0,0,0]
     temp_x_x2 = arr_coeffs_y[0,0,0]-arr_coeffs_y[0,0,0]
@@ -3970,10 +3970,10 @@ def eval_jacobians_inv_irregular_2d(np1: int, np2: int, f_p1: int, f_p2: int, ce
         Inverse of the Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
     temp_x_x1 = arr_coeffs_x[0,0]-arr_coeffs_x[0,0]
     temp_x_x2 = arr_coeffs_y[0,0]-arr_coeffs_y[0,0]
@@ -4095,34 +4095,34 @@ def eval_jacobians_inv_3d_weights(nc1: int, nc2: int, nc3: int,  f_p1: int, f_p2
         Inverse of the Jacobian matrix on the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_x = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_y = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_z = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_x = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_y = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_z = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
-    arr_x_x3 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
+    arr_x_x3 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2, k3))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
-    arr_y_x3 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
+    arr_y_x3 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2, k3))
 
-    arr_z_x1 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x2 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
-    arr_z_x3 = np.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x1 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x2 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
+    arr_z_x3 = xp.zeros_like(global_arr_coeff_z, shape=(k1, k2, k3))
 
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_weights = np.zeros((k1, k2, k3))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_weights = xp.zeros((k1, k2, k3))
 
-    arr_weights_x1 = np.zeros((k1, k2, k3))
-    arr_weights_x2 = np.zeros((k1, k2, k3))
-    arr_weights_x3 = np.zeros((k1, k2, k3))
+    arr_weights_x1 = xp.zeros((k1, k2, k3))
+    arr_weights_x2 = xp.zeros((k1, k2, k3))
+    arr_weights_x3 = xp.zeros((k1, k2, k3))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -4332,25 +4332,25 @@ def eval_jacobians_inv_2d_weights(nc1: int, nc2: int,  f_p1: int, f_p2: int, k1:
         Inverse of the Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_x = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_y = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_x = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_y = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
-    arr_x_x1 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
-    arr_x_x2 = np.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x1 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
+    arr_x_x2 = xp.zeros_like(global_arr_coeff_x, shape=(k1, k2))
 
-    arr_y_x1 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
-    arr_y_x2 = np.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x1 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
+    arr_y_x2 = xp.zeros_like(global_arr_coeff_y, shape=(k1, k2))
 
-    arr_coeff_weights = np.zeros((1 + f_p1, 1 + f_p2))
-    arr_weights = np.zeros((k1, k2))
+    arr_coeff_weights = xp.zeros((1 + f_p1, 1 + f_p2))
+    arr_weights = xp.zeros((k1, k2))
 
-    arr_weights_x1 = np.zeros((k1, k2))
-    arr_weights_x2 = np.zeros((k1, k2))
+    arr_weights_x1 = xp.zeros((k1, k2))
+    arr_weights_x2 = xp.zeros((k1, k2))
 
     for i_cell_1 in range(nc1):
         span_1 = global_spans_1[i_cell_1]
@@ -4511,13 +4511,13 @@ def eval_jacobians_inv_irregular_3d_weights(np1: int, np2: int, np3: int, f_p1:
         Inverse of the Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((3, 3))
+    jmat = xp.empty((3, 3))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
-    arr_coeffs_z = np.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_z = xp.zeros_like(global_arr_coeff_z, shape=(1 + f_p1, 1 + f_p2, 1 + f_p3))
 
-    arr_coeffs_weights = np.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
+    arr_coeffs_weights = xp.zeros((1 + f_p1, 1 + f_p2, 1 + f_p3))
 
     temp_x = arr_coeffs_x[0,0,0]-arr_coeffs_x[0,0,0]
     temp_y = arr_coeffs_y[0,0,0]-arr_coeffs_y[0,0,0]
@@ -4726,12 +4726,12 @@ def eval_jacobians_inv_irregular_2d_weights(np1: int, np2: int, f_p1: int, f_p2:
         Inverse of the Jacobian matrix at every point of the grid
     """
 
-    jmat = np.empty((2, 2))
+    jmat = xp.empty((2, 2))
 
-    arr_coeffs_x = np.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
-    arr_coeffs_y = np.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_x = xp.zeros_like(global_arr_coeff_x, shape=(1 + f_p1, 1 + f_p2))
+    arr_coeffs_y = xp.zeros_like(global_arr_coeff_y, shape=(1 + f_p1, 1 + f_p2))
 
-    arr_coeffs_weights = np.zeros((1 + f_p1, 1 + f_p2))
+    arr_coeffs_weights = xp.zeros((1 + f_p1, 1 + f_p2))
 
     temp_x = arr_coeffs_x[0,0]-arr_coeffs_x[0,0]
     temp_y = arr_coeffs_y[0,0]-arr_coeffs_y[0,0]
diff --git a/psydac/core/tests/test_bsplines.py b/psydac/core/tests/test_bsplines.py
index b32edd4b8..0210e1652 100644
--- a/psydac/core/tests/test_bsplines.py
+++ b/psydac/core/tests/test_bsplines.py
@@ -1,7 +1,7 @@
 #coding: utf-8
 
 import pytest
-import numpy as np
+import cunumpy as xp
 
 from psydac.core.bsplines import ( find_span,
         basis_funs,
@@ -28,8 +28,8 @@
 
 def test_find_span( lims, nc, p, eps=1e-12 ):
 
-    grid  = np.linspace( *lims, num=nc+1 )
-    knots = np.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
+    grid  = xp.linspace( *lims, num=nc+1 )
+    knots = xp.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
 
     for i,xi in enumerate( grid ):
         assert find_span( knots, p, x=xi-eps ) == p + max( 0,  i-1 )
@@ -43,15 +43,15 @@ def test_find_span( lims, nc, p, eps=1e-12 ):
 
 def test_basis_funs( lims, nc, p, tol=1e-14 ):
 
-    grid  = np.linspace( *lims, num=nc+1 )
-    knots = np.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
+    grid  = xp.linspace( *lims, num=nc+1 )
+    knots = xp.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
 
-    xx = np.linspace( *lims, num=101 )
+    xx = xp.linspace( *lims, num=101 )
     for x in xx:
         span  =  find_span( knots, p, x )
         basis = basis_funs( knots, p, x, span )
         assert len( basis ) == p+1
-        assert np.all( basis >= 0 )
+        assert xp.all( basis >= 0 )
         assert abs( sum( basis ) - 1.0 ) < tol
 
 #==============================================================================
@@ -61,10 +61,10 @@ def test_basis_funs( lims, nc, p, tol=1e-14 ):
 
 def test_basis_funs_1st_der( lims, nc, p, tol=1e-14 ):
 
-    grid  = np.linspace( *lims, num=nc+1 )
-    knots = np.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
+    grid  = xp.linspace( *lims, num=nc+1 )
+    knots = xp.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
 
-    xx = np.linspace( *lims, num=101 )
+    xx = xp.linspace( *lims, num=101 )
     for x in xx:
         span = find_span( knots, p, x )
         ders = basis_funs_1st_der( knots, p, x, span )
@@ -81,33 +81,33 @@ def test_basis_funs_all_ders( lims, nc, p, tol=1e-14 ):
     # Maximum derivative required
     n = p+2
 
-    grid, dx = np.linspace( *lims, num=nc+1, retstep=True )
-    knots = np.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
+    grid, dx = xp.linspace( *lims, num=nc+1, retstep=True )
+    knots = xp.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
 
-    xx = np.linspace( *lims, num=101 )
+    xx = xp.linspace( *lims, num=101 )
     for x in xx:
         span = find_span( knots, p, x )
         ders = basis_funs_all_ders( knots, p, x, span, n )
 
         # Test output array
         assert ders.shape == (1+n,1+p)
-        assert ders.dtype == np.dtype( float )
+        assert ders.dtype == xp.dtype( float )
 
         # Test 0th derivative
         der0 = basis_funs( knots, p, x, span )
-        assert np.allclose( ders[0,:], der0, rtol=1e-15, atol=1e-15 )
-        assert np.all( ders[0,:] >= 0.0 )
+        assert xp.allclose( ders[0,:], der0, rtol=1e-15, atol=1e-15 )
+        assert xp.all( ders[0,:] >= 0.0 )
 
         # Test 1st derivative
         der1 = basis_funs_1st_der( knots, p, x, span )
-        assert np.allclose( ders[1,:], der1, rtol=1e-15, atol=1e-15/dx )
+        assert xp.allclose( ders[1,:], der1, rtol=1e-15, atol=1e-15/dx )
 
         # Test 2nd to n-th derivatives
         for i in range(2,n+1):
             assert abs( ders[i,:].sum() ) <= tol * abs( ders[i,:] ).max()
 
         # Test that all derivatives of degree > p are zero
-        assert np.all( ders[p+1:,:] == 0.0 )
+        assert xp.all( ders[p+1:,:] == 0.0 )
 
 #==============================================================================
 @pytest.mark.parametrize( 'lims', ([0,1], [-2,3]) )
@@ -154,9 +154,9 @@ def test_histopolation_matrix(lims, nc, p, periodic, tol=1e-13):
                                               ([0.1, 0.1, 0.0, 0.4, 0.4, 0.9, 0.9], [0, 1, 0, 3, 4, 8, 9]),
                                               ([0., 0.1, 0.1, 1], [0, 0, 1, 9])])
 def test_cell_index(i_grid, expected):
-    breaks = np.array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.])
-    out = cell_index(breaks, np.asarray(i_grid))
-    assert np.array_equal(expected, out)
+    breaks = xp.array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.])
+    out = cell_index(breaks, xp.asarray(i_grid))
+    assert xp.array_equal(expected, out)
 
 #==============================================================================
 # SCRIPT FUNCTIONALITY: PLOT BASIS FUNCTIONS
@@ -164,7 +164,7 @@ def test_cell_index(i_grid, expected):
 if __name__ == '__main__':
 
     import matplotlib.pyplot as plt
-    np.set_printoptions(linewidth=130)
+    xp.set_printoptions(linewidth=130)
 
     # Domain limits, number of cells and spline degree
     lims = [0, 1]
@@ -176,24 +176,24 @@ def test_cell_index(i_grid, expected):
     m = 2
 
     # Grid (breakpoints) and clamped knot sequence
-    grid  = np.linspace( *lims, num=nc+1 )
-    grid[1:-1] += 0.1*np.random.random_sample(nc-1) - 0.05  # Perturb internal breakpoints
-    knots = np.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
+    grid  = xp.linspace( *lims, num=nc+1 )
+    grid[1:-1] += 0.1*xp.random.random_sample(nc-1) - 0.05  # Perturb internal breakpoints
+    knots = xp.r_[ [grid[0]]*p, grid, [grid[-1]]*p ]
 
     # Insert repeated internal knot
     knots = list( knots )
     knots = knots[:k] + [knots[k]]*m + knots[k+1:]
-    knots = np.array( knots )
+    knots = xp.array( knots )
 
     # Number of basis functions
     nb = len(knots)-p-1
 
     # Evaluation grid
-    xx = np.linspace( *lims, num=501 )
+    xx = xp.linspace( *lims, num=501 )
 
     # Compute values of each basis function on evaluation grid
-    yy = np.zeros( (len(xx), nb) )
-    zz = np.zeros( (len(xx), nb) )
+    yy = xp.zeros( (len(xx), nb) )
+    zz = xp.zeros( (len(xx), nb) )
     for i,x in enumerate( xx ):
         span = find_span( knots, p, x )
         yy[i,span-p:span+1] = basis_funs        ( knots, p, x, span )
@@ -209,7 +209,7 @@ def test_cell_index(i_grid, expected):
     #
     #   \int B(i) dx = length(support(B)) / (p + 1) = (T[i + p + 1] - T[i]) / (p + 1)
     #
-    integrals_theory = np.array([(knots[i+p+1] - knots[i]) / (p+1) for i in range(nb)])
+    integrals_theory = xp.array([(knots[i+p+1] - knots[i]) / (p+1) for i in range(nb)])
 
     # Integrals of each B-spline over domain (Gaussian quadrature)
     from psydac.utilities.quadratures import gauss_legendre
@@ -219,9 +219,9 @@ def test_cell_index(i_grid, expected):
     u, w = gauss_legendre(p + 1)
     quad_x, quad_w = quadrature_grid(grid, u, w)
     quad_basis = basis_ders_on_quad_grid(knots, p, quad_x, nders=0, normalization='B')
-    integrals  = np.zeros(nb)
+    integrals  = xp.zeros(nb)
     for ie, span in enumerate(elements_spans(knots, p)):
-        integrals[span-p:span+1] += np.dot(quad_basis[ie, :, 0, :], quad_w[ie, :])
+        integrals[span-p:span+1] += xp.dot(quad_basis[ie, :, 0, :], quad_w[ie, :])
 
     # Compare theory results with computed integrals
     print("\nIntegrals of basis functions over domain:")
@@ -245,8 +245,8 @@ def test_cell_index(i_grid, expected):
     axes[1].set_ylabel( 'z', rotation='horizontal' )
 
     # Plot knot sequence and add grid
-    values, counts = np.unique( knots, return_counts=True )
-    y = np.concatenate( [np.linspace(0,1,c,endpoint=True) for c in counts] )
+    values, counts = xp.unique( knots, return_counts=True )
+    y = xp.concatenate( [xp.linspace(0,1,c,endpoint=True) for c in counts] )
     for ax in axes:
         ax.plot( knots, y, 'ko', mew=1.0, mfc='None' )
         ax.grid()
diff --git a/psydac/core/tests/test_bsplines_kernel.py b/psydac/core/tests/test_bsplines_kernel.py
index 9cbfd22de..ec4292db7 100644
--- a/psydac/core/tests/test_bsplines_kernel.py
+++ b/psydac/core/tests/test_bsplines_kernel.py
@@ -1,15 +1,15 @@
 #coding: utf-8
 
 import pytest
-import numpy as np
+import cunumpy as xp
 
 
 from psydac.core.bsplines_kernels import cell_index_p
 
 def test_cell_index_p():
-    breaks = np.array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.])
-    breaks = np.ascontiguousarray(breaks, dtype=float)
-    out = np.zeros_like(breaks, dtype=np.int64)
+    breaks = xp.array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.])
+    breaks = xp.ascontiguousarray(breaks, dtype=float)
+    out = xp.zeros_like(breaks, dtype=xp.int64)
     tol = 1e-15
 
     # limit case: code should decide wether point is in or out, not fall in infinite loop
@@ -26,15 +26,15 @@ def test_cell_index_p():
         assert status == expected_status
 
     # checking that the values match those of searchsorted (-1) for arbitrary grid points
-    i_grid = np.array([0.14320482, 0.86569833, 0.77775327, 0.00895956, 0.074629  ,
+    i_grid = xp.array([0.14320482, 0.86569833, 0.77775327, 0.00895956, 0.074629  ,
        0.45682646, 0.5384352 , 0.20915311, 0.73121977, 0.01057414,
        0.33756086, 0.17839759, 0.14023414, 0.09846206, 0.79970392,
        0.65330406, 0.82716552, 0.24185731, 0.24054685, 0.72466651,
        0.69125033, 0.3136558 , 0.64794089, 0.47975527, 0.99802844,
        0.64402598, 0.41263526, 0.28178414, 0.57274384, 0.73218562])
-    out = np.zeros_like(i_grid, dtype=np.int64)
+    out = xp.zeros_like(i_grid, dtype=xp.int64)
     status = cell_index_p(breaks, i_grid, tol, out)
     assert status == 0
-    nps = np.searchsorted(breaks, i_grid)-1
-    assert np.allclose(out, nps)
+    nps = xp.searchsorted(breaks, i_grid)-1
+    assert xp.allclose(out, nps)
 
diff --git a/psydac/core/tests/test_bsplines_pyccel.py b/psydac/core/tests/test_bsplines_pyccel.py
index 2f8186da6..d41ccc710 100644
--- a/psydac/core/tests/test_bsplines_pyccel.py
+++ b/psydac/core/tests/test_bsplines_pyccel.py
@@ -1,4 +1,4 @@
-import numpy as np
+import cunumpy as xp
 import pytest
 
 from psydac.utilities.quadratures import gauss_legendre
@@ -20,7 +20,7 @@
 # The pytest-xdist plugin requires that every worker sees the same parameters
 # in the unit tests. As in this module random parameters are used, here we set
 # the same random seed for all workers.
-np.random.seed(0)
+xp.random.seed(0)
 
 ###############################################################################
 # "True" Functions
@@ -48,9 +48,9 @@ def find_span_true( knots, degree, x ):
 
 #==============================================================================
 def basis_funs_true( knots, degree, x, span ):
-    left   = np.empty( degree  , dtype=float )
-    right  = np.empty( degree  , dtype=float )
-    values = np.empty( degree+1, dtype=float )
+    left   = xp.empty( degree  , dtype=float )
+    right  = xp.empty( degree  , dtype=float )
+    values = xp.empty( degree+1, dtype=float )
 
     values[0] = 1.0
     for j in range(0,degree):
@@ -75,7 +75,7 @@ def basis_funs_1st_der_true( knots, degree, x, span ):
     # degree deg-1
     # -------
     # j = 0
-    ders  = np.empty( degree+1, dtype=float )
+    ders  = xp.empty( degree+1, dtype=float )
     saved = degree * values[0] / (knots[span+1]-knots[span+1-degree])
     ders[0] = -saved
     # j = 1,...,degree-1
@@ -122,11 +122,11 @@ def basis_funs_all_ders_true(knots, degree, x, span, n, normalization='B'):
         - inverse of knot differences are saved to avoid unnecessary divisions;
         - innermost loops are replaced with vector operations on slices.
     """
-    left  = np.empty( degree )
-    right = np.empty( degree )
-    ndu   = np.empty( (degree+1, degree+1) )
-    a     = np.empty( (       2, degree+1) )
-    ders  = np.zeros( (     n+1, degree+1) ) # output array
+    left  = xp.empty( degree )
+    right = xp.empty( degree )
+    ndu   = xp.empty( (degree+1, degree+1) )
+    a     = xp.empty( (       2, degree+1) )
+    ders  = xp.zeros( (     n+1, degree+1) ) # output array
 
     # Number of derivatives that need to be effectively computed
     # Derivatives higher than degree are = 0.
@@ -165,7 +165,7 @@ def basis_funs_all_ders_true(knots, degree, x, span, n, normalization='B'):
             j1 = 1   if (rk  > -1 ) else -rk
             j2 = k-1 if (r-1 <= pk) else degree-r
             a[s2,j1:j2+1] = (a[s1,j1:j2+1] - a[s1,j1-1:j2]) * ndu[pk+1,rk+j1:rk+j2+1]
-            d += np.dot( a[s2,j1:j2+1], ndu[rk+j1:rk+j2+1,pk] )
+            d += xp.dot( a[s2,j1:j2+1], ndu[rk+j1:rk+j2+1,pk] )
             if r <= pk:
                a[s2,k] = - a[s1,k-1] * ndu[pk+1,r]
                d += a[s2,k] * ndu[r,pk]
@@ -197,7 +197,7 @@ def collocation_matrix_true(knots, degree, periodic, normalization, xgrid):
     nx = len(xgrid)
 
     # Collocation matrix as 2D Numpy array (dense storage)
-    mat = np.zeros( (nx,nb) )
+    mat = xp.zeros( (nx,nb) )
 
     # Indexing of basis functions (periodic or not) for a given span
     if periodic:
@@ -226,11 +226,11 @@ def collocation_matrix_true(knots, degree, periodic, normalization, xgrid):
 #==============================================================================
 def histopolation_matrix_true(knots, degree, periodic, normalization, xgrid):
     # Check that knots are ordered (but allow repeated knots)
-    if not np.all(np.diff(knots) >= 0):
+    if not xp.all(xp.diff(knots) >= 0):
         raise ValueError("Cannot accept knot sequence: {}".format(knots))
 
     # Check that spline degree is non-negative integer
-    if not isinstance(degree, (int, np.integer)):
+    if not isinstance(degree, (int, xp.integer)):
         raise TypeError("Degree {} must be integer, got type {} instead".format(degree, type(degree)))
     if degree < 0:
         raise ValueError("Cannot accept negative degree: {}".format(degree))
@@ -244,7 +244,7 @@ def histopolation_matrix_true(knots, degree, periodic, normalization, xgrid):
         raise ValueError("Cannot accept 'normalization' parameter: {}".format(normalization))
 
     # Check that grid points are ordered, and do not allow repetitions
-    if not np.all(np.diff(xgrid) > 0):
+    if not xp.all(xp.diff(xgrid) > 0):
         raise ValueError("Grid points must be ordered, with no repetitions: {}".format(xgrid))
 
     # Number of basis functions (in periodic case remove degree repeated elements)
@@ -293,7 +293,7 @@ def histopolation_matrix_true(knots, degree, periodic, normalization, xgrid):
     # Compute histopolation matrix from collocation matrix of higher degree
     m = C.shape[0] - 1
     n = C.shape[1] - 1
-    H = np.zeros((m, n))
+    H = xp.zeros((m, n))
     for i in range(m):
         # Indices of first/last non-zero elements in row of collocation matrix
         jstart = spans[i] - (degree+1)
@@ -312,7 +312,7 @@ def histopolation_matrix_true(knots, degree, periodic, normalization, xgrid):
     # Periodic case: wrap around histopolation matrix
     #  1. identify repeated basis functions (sum columns)
     #  2. identify split interval (sum rows)
-    Hp = np.zeros((nx, nb))
+    Hp = xp.zeros((nx, nb))
     for i in range(m):
         for j in range(n):
             Hp[i % nx, j % nb] += H[i, j]
@@ -321,8 +321,8 @@ def histopolation_matrix_true(knots, degree, periodic, normalization, xgrid):
 
 #==============================================================================
 def breakpoints_true( knots, degree ,tol=1e-15):
-    knots = np.array(knots)
-    diff  = np.append(True, abs(np.diff(knots[degree:-degree]))>tol)
+    knots = xp.array(knots)
+    diff  = xp.append(True, abs(xp.diff(knots[degree:-degree]))>tol)
     return knots[degree:-degree][diff]
 
 #==============================================================================
@@ -332,7 +332,7 @@ def greville_true( knots, degree, periodic ):
     n = len(T)-2*p-1 if periodic else len(T)-p-1
 
     # Compute greville abscissas as average of p consecutive knot values
-    xg = np.array([sum(T[i:i+p])/p for i in range(1,1+n)])
+    xg = xp.array([sum(T[i:i+p])/p for i in range(1,1+n)])
 
     # Domain boundaries
     a = T[p]
@@ -341,7 +341,7 @@ def greville_true( knots, degree, periodic ):
     # If needed apply periodic boundary conditions, then sort array
     if periodic:
         xg = (xg-a) % (b-a) + a
-        xg = xg[np.argsort(xg)]
+        xg = xg[xp.argsort(xg)]
 
     # Make sure roundoff errors don't push Greville points outside domain
     xg[ 0] = max(xg[ 0], a)
@@ -354,7 +354,7 @@ def elements_spans_true( knots, degree ):
     breaks = breakpoints_true( knots, degree )
     nk     = len(knots)
     ne     = len(breaks)-1
-    spans  = np.zeros( ne, dtype=int )
+    spans  = xp.zeros( ne, dtype=int )
 
     ie = 0
     for ik in range( degree, nk-degree ):
@@ -374,14 +374,14 @@ def make_knots_true( breaks, degree, periodic, multiplicity=1 ):
 
     # Consistency checks
     assert len(breaks) > 1
-    assert all( np.diff(breaks) > 0 )
+    assert all( xp.diff(breaks) > 0 )
     assert degree > 0
     assert 1 <= multiplicity and multiplicity <= degree + 1
 
     if periodic:
         assert len(breaks) > degree
         
-    T = np.zeros(multiplicity * len(breaks[1:-1]) + 2 + 2 * degree)
+    T = xp.zeros(multiplicity * len(breaks[1:-1]) + 2 + 2 * degree)
     ncells = len(breaks) - 1
     
     for i in range(0, ncells+1):
@@ -403,7 +403,7 @@ def make_knots_true( breaks, degree, periodic, multiplicity=1 ):
 
 #==============================================================================
 def elevate_knots_true(knots, degree, periodic, multiplicity=1, tol=1e-15):
-    knots = np.array(knots)
+    knots = xp.array(knots)
 
     if periodic:
         T, p = knots, degree
@@ -414,14 +414,14 @@ def elevate_knots_true(knots, degree, periodic, multiplicity=1, tol=1e-15):
         left  = [knots[0],*knots[:degree+1]]
         right = [knots[-1],*knots[-degree-1:]]
 
-        diff   = np.append(True, np.diff(knots[degree+1:-degree-1])>tol)
+        diff   = xp.append(True, xp.diff(knots[degree+1:-degree-1])>tol)
         if len(knots[degree+1:-degree-1])>0:
             unique = knots[degree+1:-degree-1][diff]
-            knots  = np.repeat(unique, multiplicity)
+            knots  = xp.repeat(unique, multiplicity)
         else:
             knots = knots[degree+1:-degree-1]
 
-    return np.array([*left, *knots, *right])
+    return xp.array([*left, *knots, *right])
 
 #==============================================================================
 def quadrature_grid_true(breaks, quad_rule_x, quad_rule_w):
@@ -433,13 +433,13 @@ def quadrature_grid_true(breaks, quad_rule_x, quad_rule_w):
     assert min(quad_rule_x) >= -1
     assert max(quad_rule_x) <= +1
 
-    quad_rule_x = np.asarray(quad_rule_x)
-    quad_rule_w = np.asarray(quad_rule_w)
+    quad_rule_x = xp.asarray(quad_rule_x)
+    quad_rule_w = xp.asarray(quad_rule_w)
 
     ne     = len(breaks) - 1
     nq     = len(quad_rule_x)
-    quad_x = np.zeros((ne, nq))
-    quad_w = np.zeros((ne, nq))
+    quad_x = xp.zeros((ne, nq))
+    quad_w = xp.zeros((ne, nq))
 
     # Compute location and weight of quadrature points from basic rule
     for ie, (a, b) in enumerate(zip(breaks[:-1], breaks[1:])):
@@ -453,7 +453,7 @@ def quadrature_grid_true(breaks, quad_rule_x, quad_rule_w):
 #==============================================================================
 def basis_ders_on_quad_grid_true(knots, degree, quad_grid, nders, normalization):
     ne,nq = quad_grid.shape
-    basis = np.zeros((ne, degree+1, nders+1, nq))
+    basis = xp.zeros((ne, degree+1, nders+1, nq))
 
     if normalization == 'M':
         scaling = 1. / basis_integrals_true(knots, degree)
@@ -474,7 +474,7 @@ def basis_integrals_true(knots, degree):
     T = knots
     p = degree
     n = len(T)-p-1
-    K = np.array([(T[i+p+1] - T[i]) / (p + 1) for i in range(n)])
+    K = xp.array([(T[i+p+1] - T[i]) / (p + 1) for i in range(n)])
 
     return K
 
@@ -488,77 +488,77 @@ def basis_integrals_true(knots, degree):
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
-@pytest.mark.parametrize('x', (np.random.random(), np.random.random(), np.random.random()))
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+@pytest.mark.parametrize('x', (xp.random.random(), xp.random.random(), xp.random.random()))
 
 def test_find_span(knots, degree, x):
     expected = find_span_true(knots, degree, x)
     out = find_span(knots, degree, x)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
-@pytest.mark.parametrize('x', (np.random.random(), np.random.random(), np.random.random()))
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+@pytest.mark.parametrize('x', (xp.random.random(), xp.random.random(), xp.random.random()))
 def test_basis_funs(knots, degree, x):
     span = find_span(knots, degree, x)
     expected = basis_funs_true(knots, degree, x, span)
     out = basis_funs(knots, degree, x, span)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
-@pytest.mark.parametrize('x', (np.random.random(), np.random.random(), np.random.random()))
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+@pytest.mark.parametrize('x', (xp.random.random(), xp.random.random(), xp.random.random()))
 def test_basis_funs_1st_der(knots, degree, x):
     span = find_span(knots, degree, x)
     expected = basis_funs_1st_der_true(knots, degree, x, span)
     out = basis_funs_1st_der(knots, degree, x, span)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
-@pytest.mark.parametrize('x', (np.random.random(), np.random.random(), np.random.random()))
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+@pytest.mark.parametrize('x', (xp.random.random(), xp.random.random(), xp.random.random()))
 @pytest.mark.parametrize('n', (2, 3, 4, 5))
 @pytest.mark.parametrize('normalization', ('B', 'M'))
 def test_basis_funs_all_ders(knots, degree, x, n, normalization):
@@ -566,106 +566,106 @@ def test_basis_funs_all_ders(knots, degree, x, n, normalization):
     expected = basis_funs_all_ders_true(knots, degree, x, span, n, normalization)
     out = basis_funs_all_ders(knots, degree, x, span, n, normalization)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 @pytest.mark.parametrize('periodic', (True, False))
 @pytest.mark.parametrize('normalization', ('B', 'M'))
-@pytest.mark.parametrize('xgrid', (np.random.random(10), np.random.random(15)))
+@pytest.mark.parametrize('xgrid', (xp.random.random(10), xp.random.random(15)))
 def test_collocation_matrix(knots, degree, periodic, normalization, xgrid):
     expected = collocation_matrix_true(knots, degree, periodic, normalization, xgrid)
     out = collocation_matrix(knots, degree, periodic, normalization, xgrid)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 @pytest.mark.parametrize('periodic', [True, False])
 @pytest.mark.parametrize('normalization', ('B', 'M'))
-@pytest.mark.parametrize('xgrid', (np.random.random(10), np.random.random(15)))
+@pytest.mark.parametrize('xgrid', (xp.random.random(10), xp.random.random(15)))
 def test_histopolation_matrix(knots, degree, periodic, normalization, xgrid):
-    xgrid = np.sort(np.unique(xgrid))
+    xgrid = xp.sort(xp.unique(xgrid))
     expected = histopolation_matrix_true(knots, degree, periodic, normalization, xgrid)
     out = histopolation_matrix(knots, degree, periodic, normalization, xgrid)
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 def test_breakpoints(knots, degree):
     expected = breakpoints_true(knots, degree)
     out = breakpoints(knots, degree)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 @pytest.mark.parametrize('periodic', [True, False])
 def test_greville(knots, degree, periodic):
     expected = greville_true(knots, degree, periodic)
     out = greville(knots, degree, periodic)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 def test_elements_spans(knots, degree):
     expected = elements_spans_true(knots, degree)
     out = elements_spans(knots, degree)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
-@pytest.mark.parametrize('breaks', (np.linspace(0, 1, 10, endpoint=False),
-                                    np.sort(np.random.random(15))))
+@pytest.mark.parametrize('breaks', (xp.linspace(0, 1, 10, endpoint=False),
+                                    xp.sort(xp.random.random(15))))
 @pytest.mark.parametrize(('degree', 'multiplicity'), [(2, 1),
                                                       (3, 1), (3, 2),
                                                       (4, 1), (4, 2), (4, 3),
@@ -675,51 +675,51 @@ def test_make_knots(breaks, degree, periodic, multiplicity):
     expected = make_knots_true(breaks, degree, periodic, multiplicity)
     out = make_knots(breaks, degree, periodic, multiplicity)
     print(out, expected)
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 @pytest.mark.parametrize('periodic', (True, False))
 @pytest.mark.parametrize('multiplicity', (1, 2, 3))
 def test_elevate_knots(knots, degree, periodic, multiplicity):
     expected = elevate_knots_true(knots, degree, periodic, multiplicity)
     out = elevate_knots(knots, degree, periodic, multiplicity)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
-@pytest.mark.parametrize('breaks', (np.linspace(0, 1, 10, endpoint=False),
-                                    np.sort(np.random.random(15))))
+@pytest.mark.parametrize('breaks', (xp.linspace(0, 1, 10, endpoint=False),
+                                    xp.sort(xp.random.random(15))))
 @pytest.mark.parametrize('nquads', (2, 3, 4, 5))
 def test_quadrature_grid(breaks, nquads):
     quad_x, quad_w = gauss_legendre(nquads)
     expected = quadrature_grid_true(breaks, quad_x, quad_w)
     out = quadrature_grid(breaks, quad_x, quad_w)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 @pytest.mark.parametrize('n', (2, 3, 4, 5))
 @pytest.mark.parametrize('normalization', ('B', 'M'))
 @pytest.mark.parametrize('nquads', (2, 3, 4, 5))
@@ -731,22 +731,22 @@ def test_basis_ders_on_quad_grid(knots, degree, n, normalization, nquads):
     expected = basis_ders_on_quad_grid_true(knots, degree, quad_grid, n, normalization)
     out = basis_ders_on_quad_grid(knots, degree, quad_grid, n, normalization)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
 
 
 @pytest.mark.parametrize(('knots', 'degree'),
-                         [(np.sort(np.random.random(15)), 2),
-                          (np.sort(np.random.random(15)), 3),
-                          (np.sort(np.random.random(15)), 4),
-                          (np.sort(np.random.random(15)), 5),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
-                          (np.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
-                          (np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
-                          (np.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
+                         [(xp.sort(xp.random.random(15)), 2),
+                          (xp.sort(xp.random.random(15)), 3),
+                          (xp.sort(xp.random.random(15)), 4),
+                          (xp.sort(xp.random.random(15)), 5),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 3),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 4),
+                          (xp.array([0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0]), 5),
+                          (xp.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0]), 2),
+                          (xp.array([0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]), 3)])
 def test_basis_integrals(knots, degree):
     expected = basis_integrals_true(knots, degree)
     out = basis_integrals(knots, degree)
 
-    assert np.allclose(expected, out, atol=ATOL, rtol=RTOL)
+    assert xp.allclose(expected, out, atol=ATOL, rtol=RTOL)
diff --git a/psydac/ddm/blocking_data_exchanger.py b/psydac/ddm/blocking_data_exchanger.py
index 0a8c4759a..9cf7dbe82 100644
--- a/psydac/ddm/blocking_data_exchanger.py
+++ b/psydac/ddm/blocking_data_exchanger.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 
-import numpy as np
+import cunumpy as xp
 from psydac.ddm.mpi import mpi as MPI
 
 from .cart import CartDecomposition, find_mpi_type
@@ -78,7 +78,7 @@ def prepare_communications(self, u):
     # ...
     def start_update_ghost_regions( self, array, requests ):
 
-        assert isinstance( array, np.ndarray )
+        assert isinstance( array, xp.ndarray )
 
         # Shortcuts
         cart = self._cart
@@ -119,7 +119,7 @@ def end_update_ghost_regions(self,  array, requests ):
     # ...
     def start_exchange_assembly_data( self, array ):
 
-        assert isinstance( array, np.ndarray )
+        assert isinstance( array, xp.ndarray )
 
         # Shortcuts
         cart  = self._cart
@@ -146,7 +146,7 @@ def start_exchange_assembly_data( self, array ):
             rank_source = info['rank_source']
 
             if self._axis is not None:
-                rank_source = gcomm.group.Translate_ranks(np.array([rank_source]), comm.group)[0]
+                rank_source = gcomm.group.Translate_ranks(xp.array([rank_source]), comm.group)[0]
             
             recv_buf = (array, 1, recv_typ)
             recv_req = comm.Irecv( recv_buf, rank_source, tag(disp) )
@@ -156,7 +156,7 @@ def start_exchange_assembly_data( self, array ):
             rank_dest = info['rank_dest']
 
             if self._axis is not None:
-                rank_dest = gcomm.group.Translate_ranks(np.array([rank_dest]), comm.group)[0]
+                rank_dest = gcomm.group.Translate_ranks(xp.array([rank_dest]), comm.group)[0]
 
             send_buf = (array, 1, send_typ)
             send_req = comm.Isend( send_buf, rank_dest, tag(disp) )
@@ -247,15 +247,15 @@ def _create_buffer_types( cart, dtype, *, coeff_shape=() ):
                 recv_starts = list( info['recv_starts'] ) + coeff_start
 
                 send_types[direction,disp] = mpi_type.Create_subarray(
-                    sizes    = data_shape ,
-                    subsizes =  buf_shape ,
-                    starts   = send_starts,
+                    sizes    = [int(x) for x in data_shape] ,
+                    subsizes =  [int(x) for x in buf_shape] ,
+                    starts   = [int(x) for x in send_starts],
                 ).Commit()
 
                 recv_types[direction,disp] = mpi_type.Create_subarray(
-                    sizes    = data_shape ,
-                    subsizes =  buf_shape ,
-                    starts   = recv_starts,
+                    sizes    = [int(x) for x in data_shape] ,
+                    subsizes =  [int(x) for x in buf_shape] ,
+                    starts   = [int(x) for x in recv_starts],
                 ).Commit()
 
         return send_types, recv_types
@@ -334,15 +334,15 @@ def _create_assembly_buffer_types( cart, dtype, *, coeff_shape=(), axis=None, sh
                     recv_starts[axis] = 0
 
                 send_types[direction,disp] = mpi_type.Create_subarray(
-                    sizes    = data_shape ,
-                    subsizes =  buf_shape ,
-                    starts   = send_starts,
+                    sizes    = [int(x) for x in data_shape] ,
+                    subsizes =  [int(x) for x in buf_shape] ,
+                    starts   = [int(x) for x in send_starts],
                 ).Commit()
 
                 recv_types[direction,disp] = mpi_type.Create_subarray(
-                    sizes    = data_shape ,
-                    subsizes =  buf_shape ,
-                    starts   = recv_starts,
+                    sizes    = [int(x) for x in data_shape] ,
+                    subsizes =  [int(x) for x in buf_shape] ,
+                    starts   = [int(x) for x in recv_starts],
                 ).Commit()
 
         return send_types, recv_types
diff --git a/psydac/ddm/cart.py b/psydac/ddm/cart.py
index 229f4fb0d..cd019cda5 100644
--- a/psydac/ddm/cart.py
+++ b/psydac/ddm/cart.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 
 import os
-import numpy as np
+import cunumpy as xp
 from itertools import product
 
 from psydac.ddm.mpi import mpi as MPI
@@ -36,10 +36,10 @@ def find_mpi_type( dtype ):
         if isinstance( dtype, MPI.Datatype ):
             mpi_type = dtype
         else:
-            nt = np.dtype( dtype )
+            nt = xp.dtype( dtype )
             mpi_type = MPI._typedict[nt.char]
     else:
-        mpi_type = np.dtype( dtype )
+        mpi_type = xp.dtype( dtype )
 
     return mpi_type
 
@@ -250,20 +250,20 @@ def __init__(self, ncells, periods, comm=None, global_comm=None, num_threads=Non
             n = ncells[axis]
             d = nprocs[axis]
             s = n//d
-            global_shapes[axis] = np.array([s]*d)
+            global_shapes[axis] = xp.array([s]*d)
             global_shapes[axis][:n%d] += 1
 
-            self._global_element_ends  [axis] = np.cumsum(global_shapes[axis])-1
-            self._global_element_starts[axis] = np.array( [0] + [e+1 for e in self._global_element_ends[axis][:-1]] )
+            self._global_element_ends  [axis] = xp.cumsum(global_shapes[axis])-1
+            self._global_element_starts[axis] = xp.array( [0] + [e+1 for e in self._global_element_ends[axis][:-1]] )
 
         if self.is_comm_null:return
 
         if comm is None:
             # compute the coords for all processes
-            self._global_coords = np.array([np.unravel_index(rank, nprocs) for rank in range(self._size)])
+            self._global_coords = xp.array([xp.unravel_index(xp.int64(rank), nprocs) for rank in range(self._size)])
             self._coords        = self._global_coords[self._rank]
             self._rank_in_topo  = 0
-            self._ranks_in_topo = np.array([0])
+            self._ranks_in_topo = xp.array([0])
         else:
             # Create a MPI cart
             self._comm_cart = comm.Create_cart(
@@ -275,7 +275,7 @@ def __init__(self, ncells, periods, comm=None, global_comm=None, num_threads=Non
             # Know my coordinates in the topology
             self._rank_in_topo = self._comm_cart.Get_rank()
             self._coords       = self._comm_cart.Get_coords( rank=self._rank_in_topo )
-            self._ranks_in_topo = np.array(self._comm_cart.group.Translate_ranks(list(range(self._comm_cart.size)), comm.group))
+            self._ranks_in_topo = xp.array(self._comm_cart.group.Translate_ranks(list(range(self._comm_cart.size)), comm.group))
 
         # Start/end values of global indices (without ghost regions)
         self._starts = tuple( self._global_element_starts[axis][c] for axis,c in zip(range(self._ndims), self._coords) )
@@ -467,8 +467,8 @@ def __init__( self, domain_decomposition, npts, global_starts, global_ends, pads
         # Store input arguments
         self._domain_decomposition = domain_decomposition
         self._npts          = tuple( npts    )
-        self._global_starts = tuple( [ np.asarray(gs) for gs in global_starts]  )
-        self._global_ends   = tuple( [ np.asarray(ge) for ge in global_ends]    )
+        self._global_starts = tuple( [ xp.asarray(gs) for gs in global_starts]  )
+        self._global_ends   = tuple( [ xp.asarray(ge) for ge in global_ends]    )
         self._pads          = tuple( pads    )
         self._shifts        = tuple( shifts  )
         self._periods       = domain_decomposition.periods
@@ -500,7 +500,9 @@ def __init__( self, domain_decomposition, npts, global_starts, global_ends, pads
         self._ends   = tuple( self._global_ends  [axis][c] for axis,c in zip(range(self._ndims), self._coords) )
 
         # List of 1D global indices (without ghost regions)
-        self._grids = tuple( range(s,e+1) for s,e in zip( self._starts, self._ends ) )
+        # self._grids = tuple( range(s,e+1) for s,e in zip( self._starts, self._ends ) )
+        self._grids = tuple(range(int(s), int(e) + 1) for s, e in zip(self._starts, self._ends))
+
 
         # Compute shape of local arrays in topology (with ghost regions)
         self._shape = tuple( e-s+1+2*m*p for s,e,p,m in zip( self._starts, self._ends, self._pads, shifts ) )
@@ -688,8 +690,8 @@ def get_shared_memory_subdivision( self, shape ):
             self.comm.Abort(1)
 
         # compute the coords for all threads
-        coords_from_rank = np.array([np.unravel_index(rank, nthreads) for rank in range(self._num_threads)])
-        rank_from_coords = np.zeros([n+1 for n in nthreads], dtype=int)
+        coords_from_rank = xp.array([xp.unravel_index(rank, nthreads) for rank in range(self._num_threads)])
+        rank_from_coords = xp.zeros([n+1 for n in nthreads], dtype=int)
         for r in range(self._num_threads):
             c = coords_from_rank[r]
             rank_from_coords[tuple(c)] = r
@@ -708,8 +710,8 @@ def get_shared_memory_subdivision( self, shape ):
         for axis in range( self._ndims ):
             n = shape[axis]
             d = nthreads[axis]
-            thread_global_starts[axis] = np.array( [( c   *n)//d   for c in range( d )] )
-            thread_global_ends  [axis] = np.array( [((c+1)*n)//d-1 for c in range( d )] )
+            thread_global_starts[axis] = xp.array( [( c   *n)//d   for c in range( d )] )
+            thread_global_ends  [axis] = xp.array( [((c+1)*n)//d-1 for c in range( d )] )
 
         return coords_from_rank, rank_from_coords, thread_global_starts, thread_global_ends, self._num_threads
 
@@ -856,14 +858,14 @@ def _compute_shift_info( self, direction, disp ):
         m = self._shifts[direction]
 
         # Shape of send/recv subarrays
-        buf_shape = np.array( self._shape )
+        buf_shape = xp.array( self._shape )
         buf_shape[direction] = m*p
 
         # Start location of send/recv subarrays
-        send_starts          = np.zeros( self._ndims, dtype=int )
-        recv_starts          = np.zeros( self._ndims, dtype=int )
-        send_assembly_starts = np.zeros( self._ndims, dtype=int )
-        recv_assembly_starts = np.zeros( self._ndims, dtype=int )
+        send_starts          = xp.zeros( self._ndims, dtype=int )
+        recv_starts          = xp.zeros( self._ndims, dtype=int )
+        send_assembly_starts = xp.zeros( self._ndims, dtype=int )
+        recv_assembly_starts = xp.zeros( self._ndims, dtype=int )
 
         if disp > 0:
             recv_starts[direction]          = 0
@@ -904,7 +906,9 @@ def _compute_shift_info_non_blocking( self, shift ):
         if len([i for i in shift if i==0]) == 2 and rank_dest != MPI.PROC_NULL:
             direction = [i for i,s in enumerate(shift) if s != 0][0]
             comm = self._subcomm[direction]
-            local_dest_rank = self._comm_cart.group.Translate_ranks(np.array([rank_dest]), comm.group)[0]
+            # local_dest_rank = self._comm_cart.group.Translate_ranks(xp.array([rank_dest]), comm.group)[0]
+            local_dest_rank = self._comm_cart.group.Translate_ranks([int(rank_dest)], comm.group)[0]
+
         else:
             local_dest_rank = rank_dest
             comm = self._comm_cart
@@ -917,7 +921,8 @@ def _compute_shift_info_non_blocking( self, shift ):
         if len([i for i in shift if i==0]) == 2 and rank_source != MPI.PROC_NULL:
             direction = [i for i,s in enumerate(shift) if s != 0][0]
             comm = self._subcomm[direction]
-            local_source_rank = self._comm_cart.group.Translate_ranks(np.array([rank_source]), comm.group)[0]
+            # local_source_rank = self._comm_cart.group.Translate_ranks(xp.array([rank_source]), comm.group)[0]
+            local_source_rank = self._comm_cart.group.Translate_ranks([int(rank_source)], comm.group)[0]
         else:
             local_source_rank = rank_source
             comm = self._comm_cart
@@ -1091,11 +1096,11 @@ def __init__(self, cart_minus, cart_plus, comm, axes, exts, ranks_in_topo, local
         if local_comm_plus != MPI.COMM_NULL and reduce_elements == False:
             local_comm_plus.Bcast((ranks_in_topo_minus,ranks_in_topo_minus.size, dtype), root=0)
 
-        self._coords_from_rank_minus = np.array([np.unravel_index(rank, nprocs_minus) for rank in range(size_minus)])
-        self._coords_from_rank_plus  = np.array([np.unravel_index(rank, nprocs_plus)  for rank in range(size_plus)])
+        self._coords_from_rank_minus = xp.array([xp.unravel_index(rank, nprocs_minus) for rank in range(size_minus)])
+        self._coords_from_rank_plus  = xp.array([xp.unravel_index(rank, nprocs_plus)  for rank in range(size_plus)])
 
-        rank_from_coords_minus = np.zeros(nprocs_minus, dtype=int)
-        rank_from_coords_plus  = np.zeros(nprocs_plus, dtype=int)
+        rank_from_coords_minus = xp.zeros(nprocs_minus, dtype=int)
+        rank_from_coords_plus  = xp.zeros(nprocs_plus, dtype=int)
 
         for r in range(size_minus):
             rank_from_coords_minus[tuple(self._coords_from_rank_minus[r])] = r
@@ -1539,10 +1544,10 @@ def _compute_interface_communication_infos( self, axis ):
                 starts[axis] = starts[axis] if ext_plus == -1 else ends[axis]-pads_plus[axis]+diff
                 ends[axis]   = starts[axis]+pads_plus[axis]-diff if ext_plus == -1 else ends[axis]
                 shape_k = [e-s+1 for s,e in zip(starts, ends)]
-                recv_counts[k] = np.prod(shape_k)
+                recv_counts[k] = xp.prod(shape_k)
                 ranges         = [(s+p*m, p*m+e+1) for s,e,p,m in zip(starts, ends, pads_plus, shifts_plus)]
                 ranges[axis]   = (shifts_plus[axis]*pads_plus[axis], shifts_plus[axis]*pads_plus[axis]+shape_k[axis])
-                indices       += [np.ravel_multi_index( ii, dims=recv_shape, order='C' ) for ii in product(*[range(*a) for a in ranges])]
+                indices       += [xp.ravel_multi_index( ii, dims=recv_shape, order='C' ) for ii in product(*[range(*a) for a in ranges])]
 
         elif self._local_rank_plus is not None:
             rank_plus = self._local_rank_plus
@@ -1574,12 +1579,12 @@ def _compute_interface_communication_infos( self, axis ):
                 starts[axis] = starts[axis] if ext_minus == -1 else ends[axis]-pads_minus[axis]+diff
                 ends[axis]   = starts[axis]+pads_minus[axis]-diff if ext_minus == -1 else ends[axis]
                 shape_k = [e-s+1 for s,e in zip(starts, ends)]
-                recv_counts[k] = np.prod(shape_k)
+                recv_counts[k] = xp.prod(shape_k)
                 ranges       = [(s+p*m, p*m+e+1) for s,e,p,m in zip(starts, ends, pads_minus, shifts_minus)]
                 ranges[axis] = (shifts_minus[axis]*pads_minus[axis], shifts_minus[axis]*pads_minus[axis]+shape_k[axis])
-                indices     += [np.ravel_multi_index( ii, dims=recv_shape, order='C' ) for ii in product(*[range(*a) for a in ranges])]
+                indices     += [xp.ravel_multi_index( ii, dims=recv_shape, order='C' ) for ii in product(*[range(*a) for a in ranges])]
 
-        displacements[1:] = np.cumsum(recv_counts)
+        displacements[1:] = xp.cumsum(recv_counts)
         # Store all information into dictionary
         info = {'send_buf_shape' : tuple( send_buf_shape ),
                 'send_starts'    : tuple( send_starts ),
@@ -1809,8 +1814,8 @@ def create_interfaces_cart(domain_decomposition, carts, interfaces, communicatio
         axis_i, ext_i   = interfaces[i,j][0]
         axis_j, ext_j   = interfaces[i,j][1]
         if interfaces_comm[i,j] != MPI.COMM_NULL:
-            ranks_in_topo_i = domain_decomposition.domains[i].ranks_in_topo if i in owned_groups else np.full(local_groups[i].size, -1)
-            ranks_in_topo_j = domain_decomposition.domains[j].ranks_in_topo if j in owned_groups else np.full(local_groups[j].size, -1)
+            ranks_in_topo_i = domain_decomposition.domains[i].ranks_in_topo if i in owned_groups else xp.full(local_groups[i].size, -1)
+            ranks_in_topo_j = domain_decomposition.domains[j].ranks_in_topo if j in owned_groups else xp.full(local_groups[j].size, -1)
 
             if interfaces_comm[i,j].rank == interfaces_root_ranks[i,j][0]:
                 req.append(interfaces_comm[i,j].Isend((ranks_in_topo_i, ranks_in_topo_i.size, dtype), interfaces_root_ranks[i,j][1], tag=tag(i,j,1)))
diff --git a/psydac/ddm/nonblocking_data_exchanger.py b/psydac/ddm/nonblocking_data_exchanger.py
index ecfc15730..765b7ef5a 100644
--- a/psydac/ddm/nonblocking_data_exchanger.py
+++ b/psydac/ddm/nonblocking_data_exchanger.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 
-import numpy as np
+import cunumpy as xp
 from itertools import product
 
 from psydac.ddm.mpi import mpi as MPI
@@ -105,7 +105,7 @@ def end_update_ghost_regions(self, array, requests):
     # ...
     def start_exchange_assembly_data( self, array ):
 
-        assert isinstance( array, np.ndarray )
+        assert isinstance( array, xp.ndarray )
 
         # Shortcuts
         cart  = self._cart
@@ -132,7 +132,7 @@ def start_exchange_assembly_data( self, array ):
             rank_source = info['rank_source']
 
             if self._axis is not None:
-                rank_source = gcomm.group.Translate_ranks(np.array([rank_source]), comm.group)[0]
+                rank_source = gcomm.group.Translate_ranks(xp.array([rank_source]), comm.group)[0]
             
             recv_buf = (array, 1, recv_typ)
             recv_req = comm.Irecv( recv_buf, rank_source, tag(disp) )
@@ -142,7 +142,7 @@ def start_exchange_assembly_data( self, array ):
             rank_dest = info['rank_dest']
 
             if self._axis is not None:
-                rank_dest = gcomm.group.Translate_ranks(np.array([rank_dest]), comm.group)[0]
+                rank_dest = gcomm.group.Translate_ranks(xp.array([rank_dest]), comm.group)[0]
 
             send_buf = (array, 1, send_typ)
             send_req = comm.Isend( send_buf, rank_dest, tag(disp) )
@@ -234,19 +234,29 @@ def _create_buffer_types( cart, dtype, *, coeff_shape=() ):
             recv_starts = list( info['recv_starts'] ) + coeff_start
 
             if info['rank_dest']>=0:
+                # send_types[shift] = mpi_type.Create_subarray(
+                #     sizes    = data_shape,
+                #     subsizes = buf_shape,
+                #     starts   = send_starts,
+                # ).Commit()
                 send_types[shift] = mpi_type.Create_subarray(
-                    sizes    = data_shape,
-                    subsizes = buf_shape,
-                    starts   = send_starts,
+                    sizes    = [int(x) for x in data_shape],
+                    subsizes = [int(x) for x in buf_shape],
+                    starts   = [int(x) for x in send_starts],
                 ).Commit()
             else:
                 send_types[shift] = MPI.DATATYPE_NULL
 
             if info['rank_source']>=0:
+                # recv_types[shift] = mpi_type.Create_subarray(
+                #     sizes    = data_shape,
+                #     subsizes = buf_shape,
+                #     starts   = recv_starts,
+                # ).Commit()
                 recv_types[shift] = mpi_type.Create_subarray(
-                    sizes    = data_shape,
-                    subsizes = buf_shape,
-                    starts   = recv_starts,
+                    sizes    = [int(x) for x in data_shape],
+                    subsizes = [int(x) for x in buf_shape],
+                    starts   = [int(x) for x in recv_starts],
                 ).Commit()
             else:
                 recv_types[shift] = MPI.DATATYPE_NULL
@@ -317,15 +327,21 @@ def _create_assembly_buffer_types( cart, dtype, *, coeff_shape=(), axis=None, sh
                     recv_starts[axis] = 0
 
                 send_types[direction,disp] = mpi_type.Create_subarray(
-                    sizes    = data_shape ,
-                    subsizes =  buf_shape ,
-                    starts   = send_starts,
+                    # sizes    = data_shape ,
+                    # subsizes =  buf_shape ,
+                    # starts   = send_starts,
+                    sizes    = [int(x) for x in data_shape],
+                    subsizes = [int(x) for x in buf_shape],
+                    starts   = [int(x) for x in send_starts],
                 ).Commit()
 
                 recv_types[direction,disp] = mpi_type.Create_subarray(
-                    sizes    = data_shape ,
-                    subsizes =  buf_shape ,
-                    starts   = recv_starts,
+                    # sizes    = data_shape ,
+                    # subsizes =  buf_shape ,
+                    # starts   = recv_starts,
+                    sizes    = [int(x) for x in data_shape],
+                    subsizes = [int(x) for x in buf_shape],
+                    starts   = [int(x) for x in recv_starts],
                 ).Commit()
 
         return send_types, recv_types
diff --git a/psydac/ddm/partition.py b/psydac/ddm/partition.py
index fb8519430..fff985d79 100644
--- a/psydac/ddm/partition.py
+++ b/psydac/ddm/partition.py
@@ -1,4 +1,4 @@
-import numpy    as np
+import cunumpy as xp
 import numpy.ma as ma
 
 from sympy.ntheory import factorint
@@ -30,11 +30,11 @@ def partition_procs_per_patch(npts, size):
         such that k1<=k2.
 
     """
-    npts       = [np.prod(nc) for nc in npts]
+    npts       = [xp.prod(nc) for nc in npts]
     percentage = [nc / sum(npts) for nc in npts]
-    sizes      = np.array([int(p*size) for p in percentage])
+    sizes      = xp.array([int(p*size) for p in percentage])
     diff       = [p * size - s for s, p in zip(sizes, percentage)]
-    indices    = np.argsort(diff)[::-1]
+    indices    = xp.argsort(diff)[::-1]
     rm         = size - sum(sizes)
 
     sizes[indices[:rm]] +=1
@@ -49,7 +49,7 @@ def partition_procs_per_patch(npts, size):
 
     assert start == size
 
-    ranges = np.array(ranges)
+    ranges = xp.array(ranges)
     ranks  = [i[0] for i in ranges[indices[:rm]]]
 
     if len(ranks) == 0:
@@ -104,7 +104,7 @@ def compute_dims( nnodes, gridsizes, min_blocksizes=None, mpi=None, try_uniform=
     """
     assert nnodes > 0
     assert all( s > 0 for s in gridsizes )
-    assert np.prod( gridsizes ) >= nnodes
+    assert xp.prod( xp.asarray(gridsizes) ) >= nnodes
 
     if (min_blocksizes is not None):
         assert len( min_blocksizes ) == len( gridsizes )
@@ -112,7 +112,7 @@ def compute_dims( nnodes, gridsizes, min_blocksizes=None, mpi=None, try_uniform=
         assert all( s >= m for s,m in zip( gridsizes, min_blocksizes ) )
 
     # Determine whether uniform decomposition is possible
-    uniform = (np.prod( gridsizes ) % nnodes == 0)
+    uniform = (xp.prod( xp.asarray(gridsizes) ) % nnodes == 0)
 
     # Compute dimensions of MPI Cartesian topology with most appropriate algorithm
     if try_uniform and uniform:
@@ -160,11 +160,11 @@ def compute_dims_general( mpi_size, npts, mpi_dims_mask=None ):
 
     for a in f:
 
-        i = np.argmax( shape )
+        i = xp.argmax( shape )
         max_shape = shape[i]
 
         if shape.count( max_shape ) > 1:
-            i = ma.array( nprocs, mask=np.not_equal( shape, max_shape ) ).argmin()
+            i = ma.array( nprocs, mask=xp.not_equal( shape, max_shape ) ).argmin()
 
         nprocs[i]  *= a
         shape [i] //= a
@@ -191,17 +191,17 @@ def compute_dims_uniform( mpi_size, npts ):
 
         for k in range( power ):
 
-            i = np.argmax( exponents )
+            i = xp.argmax( exponents )
             max_exp = exponents[i]
 
             if exponents.count( max_exp ) > 1:
-                i = ma.array( nprocs, mask=np.not_equal( exponents, max_exp ) ).argmin()
+                i = ma.array( nprocs, mask=xp.not_equal( exponents, max_exp ) ).argmin()
 
             nprocs   [i] *= a
             exponents[i] -= 1
 
             npts_factors[i][a] -= 1
 
-    shape = [np.prod( [key**val for key,val in f.items()] ) for f in npts_factors]
+    shape = [xp.prod( [key**val for key,val in f.items()] ) for f in npts_factors]
 
     return nprocs, shape
diff --git a/psydac/ddm/petsc.py b/psydac/ddm/petsc.py
index 88a3b8abe..5b865ea20 100644
--- a/psydac/ddm/petsc.py
+++ b/psydac/ddm/petsc.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 
-import numpy as np
+import cunumpy as xp
 from itertools import product
 
 from .cart import CartDecomposition
@@ -28,7 +28,7 @@ def __init__(self, cart):
         self._local_shape = tuple( e-s+1 for s,e in zip( cart._starts, cart._ends ) )
 
         # Compute local size of local arrays in topology (without ghost regions)
-        self._local_size  = np.prod(self._local_shape)
+        self._local_size  = xp.prod(self._local_shape)
  
 
     @property
@@ -69,7 +69,7 @@ def _create_indices( self ):
         cart    = self.cart
         indices = product(*cart._grids)
         npts    = cart.npts
-        array   = [np.ravel_multi_index(i, npts) for i in indices]
+        array   = [xp.ravel_multi_index(i, npts) for i in indices]
         return array
 
     def _create_extended_indices( self ):
@@ -79,7 +79,7 @@ def _create_extended_indices( self ):
         indices = product(*cart._extended_grids)
         npts    = cart.npts
         mode    = tuple('wrap' if P else 'clip' for P in cart.periods)
-        array   = [np.ravel_multi_index(i, npts, mode=mode) for i in indices]
+        array   = [xp.ravel_multi_index(i, npts, mode=mode) for i in indices]
         return array
 
     def _create_Ao( self ):
diff --git a/psydac/ddm/tests/test_cart_1d.py b/psydac/ddm/tests/test_cart_1d.py
index ea5446da2..55274446d 100644
--- a/psydac/ddm/tests/test_cart_1d.py
+++ b/psydac/ddm/tests/test_cart_1d.py
@@ -1,6 +1,6 @@
 # Contents of test_cart_1d.py
 
-import numpy as np
+import cunumpy as xp
 
 from psydac.ddm.blocking_data_exchanger    import BlockingCartDataExchanger
 from psydac.ddm.nonblocking_data_exchanger import NonBlockingCartDataExchanger
@@ -10,7 +10,7 @@
 #===============================================================================
 def run_cart_1d( data_exchanger_type, verbose=False ):
 
-    import numpy as np
+    import cunumpy as xp
     from psydac.ddm.mpi import mpi as MPI
     from psydac.ddm.cart import DomainDecomposition, CartDecomposition
 
@@ -45,7 +45,7 @@ def run_cart_1d( data_exchanger_type, verbose=False ):
 
     global_ends        = [ee]
     global_ends[0][-1] = n1-1
-    global_starts      = [np.array([0] + (global_ends[0][:-1]+1).tolist())]
+    global_starts      = [xp.array([0] + (global_ends[0][:-1]+1).tolist())]
 
     # Decomposition of Cartesian domain
     cart = CartDecomposition(
@@ -58,7 +58,7 @@ def run_cart_1d( data_exchanger_type, verbose=False ):
     )
 
     # Local 1D array (extended domain)
-    u = np.zeros( cart.shape, dtype=int )
+    u = xp.zeros( cart.shape, dtype=int )
 
     # Global indices of first and last elements of array
     s1, = cart.starts
diff --git a/psydac/ddm/tests/test_cart_2d.py b/psydac/ddm/tests/test_cart_2d.py
index 3f21af8ad..006d0ea0c 100644
--- a/psydac/ddm/tests/test_cart_2d.py
+++ b/psydac/ddm/tests/test_cart_2d.py
@@ -8,7 +8,7 @@
 #===============================================================================
 def run_cart_2d( data_exchanger_type, verbose=False , nprocs=None, reverse_axis=None):
 
-    import numpy as np
+    import cunumpy as xp
     from psydac.ddm.mpi import mpi as MPI  
     from psydac.ddm.cart import DomainDecomposition, CartDecomposition
 
@@ -51,7 +51,7 @@ def run_cart_2d( data_exchanger_type, verbose=False , nprocs=None, reverse_axis=
 
         global_ends  [axis]     = (ee+1)-1
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     # Decomposition of Cartesian domain
     cart = CartDecomposition(
@@ -65,7 +65,7 @@ def run_cart_2d( data_exchanger_type, verbose=False , nprocs=None, reverse_axis=
 
     # Local 2D array with 2D vector data (extended domain)
     shape = list( cart.shape ) + [2]
-    u = np.zeros( shape, dtype=int )
+    u = xp.zeros( shape, dtype=int )
 
     # Global indices of first and last elements of array
     s1,s2 = cart.starts
diff --git a/psydac/ddm/tests/test_cart_3d.py b/psydac/ddm/tests/test_cart_3d.py
index 7f86a32f2..de70cc396 100644
--- a/psydac/ddm/tests/test_cart_3d.py
+++ b/psydac/ddm/tests/test_cart_3d.py
@@ -8,7 +8,7 @@
 #===============================================================================
 def run_cart_3d( data_exchanger_type, verbose=False ):
 
-    import numpy as np
+    import cunumpy as xp
     from psydac.ddm.mpi import mpi as MPI 
     from psydac.ddm.cart import DomainDecomposition, CartDecomposition
 
@@ -56,7 +56,7 @@ def run_cart_3d( data_exchanger_type, verbose=False ):
 
         global_ends  [axis]     = (ee+1)-1
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     # Decomposition of Cartesian domain
     cart = CartDecomposition(
@@ -70,7 +70,7 @@ def run_cart_3d( data_exchanger_type, verbose=False ):
 
     # Local 3D array with 3D vector data (extended domain)
     shape = list( cart.shape ) + [3]
-    u = np.zeros( shape, dtype=int )
+    u = xp.zeros( shape, dtype=int )
 
     # Global indices of first and last elements of array
     s1,s2,s3 = cart.starts
diff --git a/psydac/ddm/tests/test_multicart_2d.py b/psydac/ddm/tests/test_multicart_2d.py
index 4d8f57917..cc7a6f194 100644
--- a/psydac/ddm/tests/test_multicart_2d.py
+++ b/psydac/ddm/tests/test_multicart_2d.py
@@ -35,7 +35,7 @@ def get_plus_starts_ends(minus_starts, minus_ends, minus_npts, plus_npts, minus_
 # TEST MultiPatchDomainDecomposition and CartDataExchanger in 2D
 #===============================================================================
 def run_carts_2d():
-    import numpy as np
+    import cunumpy as xp
     
     from psydac.ddm.mpi import mpi as MPI
     from psydac.ddm.cart import MultiPatchDomainDecomposition, CartDecomposition, create_interfaces_cart
@@ -84,7 +84,7 @@ def run_carts_2d():
 
             global_ends  [axis]     = (ee+1)-1
             global_ends  [axis][-1] = n[i][axis]-1
-            global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+            global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
         carts.append(CartDecomposition(
                         domain_decomposition      = domain_decomposition.domains[i],
@@ -109,7 +109,7 @@ def run_carts_2d():
             s1,s2 = ci.starts
             e1,e2 = ci.ends
             m1,m2 = ci.shifts
-            us[i] = np.zeros( ci.shape, dtype=dtype )
+            us[i] = xp.zeros( ci.shape, dtype=dtype )
             us[i][m1*p1:-m1*p1,m2*p2:-m2*p2] = [[val(i,i1,i2)for i2 in range(s2,e2+1)] for i1 in range(s1,e1+1)]
             synchronizer = BlockingCartDataExchanger( ci, us[i].dtype)
             syn[i] = synchronizer
@@ -118,11 +118,11 @@ def run_carts_2d():
         if not interfaces_cart[i,j].is_comm_null:
             if carts[i].is_comm_null:
                 shape = interfaces_cart[i,j].get_interface_communication_infos(interfaces_cart[i,j]._axis)['gbuf_recv_shape'][0]
-                us[i] = np.zeros(shape, dtype=dtype)
+                us[i] = xp.zeros(shape, dtype=dtype)
 
             if carts[j].is_comm_null:
                 shape = interfaces_cart[i,j].get_interface_communication_infos(interfaces_cart[i,j]._axis)['gbuf_recv_shape'][0]
-                us[j] = np.zeros(shape, dtype=dtype)
+                us[j] = xp.zeros(shape, dtype=dtype)
 
             syn_interface[i,j] = InterfaceCartDataExchanger(interfaces_cart[i,j], dtype)
 
@@ -153,11 +153,11 @@ def run_carts_2d():
 
 #            if not carts[minus].is_comm_null:
 #                uex =  [[val(plus,i1,i2)for i2 in range(*ranges[1])] for i1 in range(*ranges[0])]
-#                uex = np.pad(uex, [(m*p,m*p) for m,p in zip(carts[minus].shifts, carts[minus].pads)])
+#                uex = xp.pad(uex, [(m*p,m*p) for m,p in zip(carts[minus].shifts, carts[minus].pads)])
 #                u_ij = us[plus]
 #            elif not carts[plus].is_comm_null:
 #                uex =  [[val(minus,i1,i2)for i2 in range(*ranges[1])] for i1 in range(*ranges[0])]
-#                uex = np.pad(uex, [(m*p,m*p) for m,p in zip(carts[plus].shifts, carts[plus].pads)])
+#                uex = xp.pad(uex, [(m*p,m*p) for m,p in zip(carts[plus].shifts, carts[plus].pads)])
 #                u_ij = us[minus]
 
 #            success = (u_ij == uex).all()
diff --git a/psydac/feec/derivatives.py b/psydac/feec/derivatives.py
index f7f28ccbd..b23ca4a81 100644
--- a/psydac/feec/derivatives.py
+++ b/psydac/feec/derivatives.py
@@ -1,6 +1,6 @@
 # -*- coding: UTF-8 -*-
 
-import numpy as np
+import cunumpy as xp
 import scipy.sparse as spa
 
 from psydac.linalg.stencil  import StencilVector, StencilMatrix, StencilVectorSpace
@@ -126,10 +126,10 @@ def __init__(self, V, W, diffdir, *, negative=False, transposed=False):
 
         # define differentiation lambda based on the parameter negative (or sign)
         if self._negative:
-            self._do_diff = lambda v,out: np.subtract(v._data[idslice],
+            self._do_diff = lambda v,out: xp.subtract(v._data[idslice],
                                 v._data[diffslice], out=out._data[idslice])
         else:
-            self._do_diff = lambda v,out: np.subtract(v._data[diffslice],
+            self._do_diff = lambda v,out: xp.subtract(v._data[diffslice],
                                 v._data[idslice], out=out._data[idslice])
 
     @property
@@ -325,13 +325,13 @@ def tosparse(self, **kwargs):
                     directional_matrix = spa.coo_array((codomain_local, domain_local))
 
                 else:
-                    maindiag = np.ones(domain_local) * (-sign)
-                    adddiag = np.ones(domain_local) * sign
+                    maindiag = xp.ones(domain_local) * (-sign)
+                    adddiag = xp.ones(domain_local) * sign
 
                     # handle special case with not self.domain.parallel and not with_pads and periodic
                     if self.domain.periods[d] and not self.domain.parallel and not with_pads:
                         # then: add element to other side of the array
-                        adddiagcirc = np.array([sign])
+                        adddiagcirc = xp.array([sign])
                         offsets = (-codomain_local+1, 0, 1)
                         diags = (adddiagcirc, maindiag, adddiag)
                     else:
diff --git a/psydac/feec/global_projectors.py b/psydac/feec/global_projectors.py
index 9cc1451a9..c8bc95bcd 100644
--- a/psydac/feec/global_projectors.py
+++ b/psydac/feec/global_projectors.py
@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
 
-import numpy as np
-
+import cunumpy as xp
+from cunumpy.xp import array_backend
 from psydac.linalg.kron           import KroneckerLinearSolver, KroneckerStencilMatrix
 from psydac.linalg.stencil        import StencilMatrix, StencilVectorSpace
 from psydac.linalg.block          import BlockLinearOperator
@@ -158,23 +158,32 @@ def __init__(self, space, nquads = None):
                     local_intp_x = intp_x[j]
 
                     # for the grids, make interpolation appear like quadrature
-                    local_x = local_intp_x[:, np.newaxis]
-                    local_w = np.ones_like(local_x)
+                    local_x = local_intp_x[:, xp.newaxis]
+                    local_w = xp.ones_like(local_x)
                     solvercells += [V._interpolator]
                     
                     # make 1D collocation matrix in stencil format
-                    row_indices, col_indices = np.nonzero(V.imat)
+                    if array_backend.backend == "cupy":
+                        V_imat = xp.asarray(V.imat)  # converts to cupy array if not already
+                    else:
+                        V_imat = V.imat
+
+                    
+                    row_indices, col_indices = xp.nonzero(V_imat)
 
                     for row_i, col_i in zip(row_indices, col_indices):
 
                         # only consider row indices on process
-                        if row_i in range(V_cart.starts[0], V_cart.ends[0] + 1):
+                        if row_i in range(int(V_cart.starts[0]), int(V_cart.ends[0]) + 1):
                             row_i_loc = row_i - s
 
-                            M._data[row_i_loc + m*p, (col_i + p - row_i)%V.imat.shape[1]] = V.imat[row_i, col_i]
+                            
+                            M._data[row_i_loc + m*p, (col_i + p - row_i)%V.imat.shape[1]] = V_imat[row_i, col_i]
 
                     # check if stencil matrix was built correctly
-                    assert np.allclose(M.toarray()[s:e + 1], V.imat[s:e + 1])
+                    # assert xp.allclose(M.toarray()[s:e + 1], V_imat[s:e + 1])
+                    assert xp.allclose(M.toarray()[int(s):int(e) + 1], V_imat[int(s):int(e) + 1])
+
                     # TODO Fix toarray() for multiplicity m > 1
                     matrixcells += [M.copy()]
                     
@@ -196,18 +205,20 @@ def __init__(self, space, nquads = None):
                     solvercells += [V._histopolator]
                     
                     # make 1D collocation matrix in stencil format
-                    row_indices, col_indices = np.nonzero(V.hmat)
+                    if array_backend.backend == "cupy":
+                        row_indices, col_indices = xp.nonzero(xp.array(V.hmat))
+                    else:
+                        row_indices, col_indices = xp.nonzero(V.hmat)
 
                     for row_i, col_i in zip(row_indices, col_indices):
 
                         # only consider row indices on process
-                        if row_i in range(V_cart.starts[0], V_cart.ends[0] + 1):
+                        if row_i in range(int(V_cart.starts[0]), int(V_cart.ends[0]) + 1):
                             row_i_loc = row_i - s
-
-                            M._data[row_i_loc + m*p, (col_i + p - row_i)%V.hmat.shape[1]] = V.hmat[row_i, col_i]
+                            M._data[row_i_loc + m*p, (col_i + p - row_i)%V.hmat.shape[1]] = V.hmat[int(row_i), int(col_i)]
 
                     # check if stencil matrix was built correctly
-                    assert np.allclose(M.toarray()[s:e + 1], V.hmat[s:e + 1])
+                    assert xp.allclose(M.toarray()[int(s):int(e) + 1], V.hmat[int(s):int(e) + 1])
 
                     matrixcells += [M.copy()]
                     
@@ -741,12 +752,12 @@ def evaluate_dofs_1d_0form(
         ):
     
     # evaluate input functions at interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
     
-    E1, = np.meshgrid(intp_x1, indexing='ij')
+    E1, = xp.meshgrid(intp_x1, indexing='ij')
     f_pts = f(E1)
     
-    F_temp = np.zeros_like(F, order='C')
+    F_temp = xp.zeros_like(F, order='C')
     
     dof_kernels.evaluate_dofs_1d_0form(F_temp, f_pts)
     
@@ -761,11 +772,11 @@ def evaluate_dofs_1d_1form(
         ):
 
     # evaluate input functions at quadrature points (make sure that points are in [0, 1])
-    E1, = np.meshgrid(quad_x1.flatten()%1., indexing='ij')
+    E1, = xp.meshgrid(quad_x1.flatten()%1., indexing='ij')
     f_pts = f(E1)
     
     # call kernel
-    F_temp = np.zeros_like(F, order='C')
+    F_temp = xp.zeros_like(F, order='C')
     
     dof_kernels.evaluate_dofs_1d_1form(quad_w1, F_temp, f_pts)
     
@@ -782,13 +793,13 @@ def evaluate_dofs_2d_0form(
         ):
     
     # evaluate input functions at interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
     
-    E1, E2 = np.meshgrid(intp_x1, intp_x2, indexing='ij')
+    E1, E2 = xp.meshgrid(intp_x1, intp_x2, indexing='ij')
     f_pts = f(E1, E2)
     
-    F_temp = np.zeros_like(F, order='C')
+    F_temp = xp.zeros_like(F, order='C')
     
     dof_kernels.evaluate_dofs_2d_0form(F_temp, f_pts)
     
@@ -804,18 +815,18 @@ def evaluate_dofs_2d_1form_hcurl(
         ):
 
     # evaluate input functions at quadrature/interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
     
-    E1, E2 = np.meshgrid(quad_x1.flatten()%1., intp_x2, indexing='ij')
+    E1, E2 = xp.meshgrid(quad_x1.flatten()%1., intp_x2, indexing='ij')
     f1_pts = f1(E1, E2)
     
-    E1, E2 = np.meshgrid(intp_x1, quad_x2.flatten()%1., indexing='ij')
+    E1, E2 = xp.meshgrid(intp_x1, quad_x2.flatten()%1., indexing='ij')
     f2_pts = f2(E1, E2)
     
     # call kernel
-    F1_temp = np.zeros_like(F1, order='C')
-    F2_temp = np.zeros_like(F2, order='C')
+    F1_temp = xp.zeros_like(F1, order='C')
+    F2_temp = xp.zeros_like(F2, order='C')
     
     dof_kernels.evaluate_dofs_2d_1form_hcurl(quad_w1, quad_w2, F1_temp, F2_temp, f1_pts, f2_pts)
     
@@ -832,18 +843,18 @@ def evaluate_dofs_2d_1form_hdiv(
         ):
 
     # evaluate input functions at quadrature/interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
     
-    E1, E2 = np.meshgrid(intp_x1, quad_x2.flatten()%1., indexing='ij')
+    E1, E2 = xp.meshgrid(intp_x1, quad_x2.flatten()%1., indexing='ij')
     f1_pts = f1(E1, E2)
     
-    E1, E2 = np.meshgrid(quad_x1.flatten()%1., intp_x2, indexing='ij')
+    E1, E2 = xp.meshgrid(quad_x1.flatten()%1., intp_x2, indexing='ij')
     f2_pts = f2(E1, E2)
     
     # call kernel
-    F1_temp = np.zeros_like(F1, order='C')
-    F2_temp = np.zeros_like(F2, order='C')
+    F1_temp = xp.zeros_like(F1, order='C')
+    F2_temp = xp.zeros_like(F2, order='C')
     
     dof_kernels.evaluate_dofs_2d_1form_hdiv(quad_w1, quad_w2, F1_temp, F2_temp, f1_pts, f2_pts)
     
@@ -859,11 +870,11 @@ def evaluate_dofs_2d_2form(
         ):
 
     # evaluate input functions at quadrature points (make sure that points are in [0, 1])
-    E1, E2 = np.meshgrid(quad_x1.flatten()%1., quad_x2.flatten()%1., indexing='ij')
+    E1, E2 = xp.meshgrid(quad_x1.flatten()%1., quad_x2.flatten()%1., indexing='ij')
     f_pts = f(E1, E2)
     
     # call kernel
-    F_temp = np.zeros_like(F, order='C')
+    F_temp = xp.zeros_like(F, order='C')
     
     dof_kernels.evaluate_dofs_2d_2form(quad_w1, quad_w2, F_temp, f_pts)
     
@@ -877,16 +888,16 @@ def evaluate_dofs_2d_vec(
         ):
     
     # evaluate input functions at interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
     
-    E1, E2 = np.meshgrid(intp_x1, intp_x2, indexing='ij')
+    E1, E2 = xp.meshgrid(intp_x1, intp_x2, indexing='ij')
     f1_pts = f1(E1, E2)
     f2_pts = f2(E1, E2)
     
     # call kernel
-    F1_temp = np.zeros_like(F1, order='C')
-    F2_temp = np.zeros_like(F2, order='C')
+    F1_temp = xp.zeros_like(F1, order='C')
+    F2_temp = xp.zeros_like(F2, order='C')
     
     dof_kernels.evaluate_dofs_2d_vec(F1_temp, F2_temp, f1_pts, f2_pts)
     
@@ -904,14 +915,14 @@ def evaluate_dofs_3d_0form(
         ):
     
     # evaluate input functions at interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
-    assert np.all(np.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
     
-    E1, E2, E3 = np.meshgrid(intp_x1, intp_x2, intp_x3, indexing='ij')
+    E1, E2, E3 = xp.meshgrid(intp_x1, intp_x2, intp_x3, indexing='ij')
     f_pts = f(E1, E2, E3)
     
-    F_temp = np.zeros_like(F, order='C')
+    F_temp = xp.zeros_like(F, order='C')
     
     dof_kernels.evaluate_dofs_3d_0form(F_temp, f_pts)
     
@@ -927,23 +938,23 @@ def evaluate_dofs_3d_1form(
         ):
 
     # evaluate input functions at quadrature/interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
-    assert np.all(np.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
     
-    E1, E2, E3 = np.meshgrid(quad_x1.flatten()%1., intp_x2, intp_x3, indexing='ij')
+    E1, E2, E3 = xp.meshgrid(quad_x1.flatten()%1., intp_x2, intp_x3, indexing='ij')
     f1_pts = f1(E1, E2, E3)
     
-    E1, E2, E3 = np.meshgrid(intp_x1, quad_x2.flatten()%1., intp_x3, indexing='ij')
+    E1, E2, E3 = xp.meshgrid(intp_x1, quad_x2.flatten()%1., intp_x3, indexing='ij')
     f2_pts = f2(E1, E2, E3)
     
-    E1, E2, E3 = np.meshgrid(intp_x1, intp_x2, quad_x3.flatten()%1., indexing='ij')
+    E1, E2, E3 = xp.meshgrid(intp_x1, intp_x2, quad_x3.flatten()%1., indexing='ij')
     f3_pts = f3(E1, E2, E3)
     
     # call kernel
-    F1_temp = np.zeros_like(F1, order='C')
-    F2_temp = np.zeros_like(F2, order='C')
-    F3_temp = np.zeros_like(F3, order='C')
+    F1_temp = xp.zeros_like(F1, order='C')
+    F2_temp = xp.zeros_like(F2, order='C')
+    F3_temp = xp.zeros_like(F3, order='C')
     
     dof_kernels.evaluate_dofs_3d_1form(quad_w1, quad_w2, quad_w3, F1_temp, F2_temp, F3_temp, f1_pts, f2_pts, f3_pts)
     
@@ -961,23 +972,23 @@ def evaluate_dofs_3d_2form(
         ):
 
     # evaluate input functions at quadrature/interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
-    assert np.all(np.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
     
-    E1, E2, E3 = np.meshgrid(intp_x1, quad_x2.flatten()%1., quad_x3.flatten()%1., indexing='ij')
+    E1, E2, E3 = xp.meshgrid(intp_x1, quad_x2.flatten()%1., quad_x3.flatten()%1., indexing='ij')
     f1_pts = f1(E1, E2, E3)
     
-    E1, E2, E3 = np.meshgrid(quad_x1.flatten()%1., intp_x2, quad_x3.flatten()%1., indexing='ij')
+    E1, E2, E3 = xp.meshgrid(quad_x1.flatten()%1., intp_x2, quad_x3.flatten()%1., indexing='ij')
     f2_pts = f2(E1, E2, E3)
     
-    E1, E2, E3 = np.meshgrid(quad_x1.flatten()%1., quad_x2.flatten()%1., intp_x3, indexing='ij')
+    E1, E2, E3 = xp.meshgrid(quad_x1.flatten()%1., quad_x2.flatten()%1., intp_x3, indexing='ij')
     f3_pts = f3(E1, E2, E3)
     
     # call kernel
-    F1_temp = np.zeros_like(F1, order='C')
-    F2_temp = np.zeros_like(F2, order='C')
-    F3_temp = np.zeros_like(F3, order='C')
+    F1_temp = xp.zeros_like(F1, order='C')
+    F2_temp = xp.zeros_like(F2, order='C')
+    F3_temp = xp.zeros_like(F3, order='C')
     
     dof_kernels.evaluate_dofs_3d_2form(quad_w1, quad_w2, quad_w3, F1_temp, F2_temp, F3_temp, f1_pts, f2_pts, f3_pts)
     
@@ -994,11 +1005,11 @@ def evaluate_dofs_3d_3form(
         ):
 
     # evaluate input functions at quadrature points (make sure that points are in [0, 1])
-    E1, E2, E3 = np.meshgrid(quad_x1.flatten()%1., quad_x2.flatten()%1., quad_x3.flatten()%1., indexing='ij')
+    E1, E2, E3 = xp.meshgrid(quad_x1.flatten()%1., quad_x2.flatten()%1., quad_x3.flatten()%1., indexing='ij')
     f_pts = f(E1, E2, E3)
     
     # call kernel
-    F_temp = np.zeros_like(F, order='C')
+    F_temp = xp.zeros_like(F, order='C')
     
     dof_kernels.evaluate_dofs_3d_3form(quad_w1, quad_w2, quad_w3, F_temp, f_pts)
     
@@ -1012,19 +1023,19 @@ def evaluate_dofs_3d_vec(
         ):
     
     # evaluate input functions at interpolation points (make sure that points are in [0, 1])
-    assert np.all(np.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
-    assert np.all(np.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
-    assert np.all(np.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
+    assert xp.all(xp.logical_and(intp_x1 >= 0., intp_x1 <= 1.))
+    assert xp.all(xp.logical_and(intp_x2 >= 0., intp_x2 <= 1.))
+    assert xp.all(xp.logical_and(intp_x3 >= 0., intp_x3 <= 1.))
     
-    E1, E2, E3 = np.meshgrid(intp_x1, intp_x2, intp_x3, indexing='ij')
+    E1, E2, E3 = xp.meshgrid(intp_x1, intp_x2, intp_x3, indexing='ij')
     f1_pts = f1(E1, E2, E3)
     f2_pts = f2(E1, E2, E3)
     f3_pts = f3(E1, E2, E3)
     
     # call kernel
-    F1_temp = np.zeros_like(F1, order='C')
-    F2_temp = np.zeros_like(F2, order='C')
-    F3_temp = np.zeros_like(F3, order='C')
+    F1_temp = xp.zeros_like(F1, order='C')
+    F2_temp = xp.zeros_like(F2, order='C')
+    F3_temp = xp.zeros_like(F3, order='C')
     
     dof_kernels.evaluate_dofs_3d_vec(F1_temp, F2_temp, F3_temp, f1_pts, f2_pts, f3_pts)
     
diff --git a/psydac/fem/grid.py b/psydac/fem/grid.py
index 4039b95b7..66228641c 100644
--- a/psydac/fem/grid.py
+++ b/psydac/fem/grid.py
@@ -2,7 +2,7 @@
 #
 # Copyright 2018 Yaman Güçlü
 
-import numpy as np
+import cunumpy as xp
 
 from psydac.core.bsplines         import elements_spans
 from psydac.core.bsplines         import quadrature_grid
diff --git a/psydac/fem/partitioning.py b/psydac/fem/partitioning.py
index de3a3acaf..426cb8aae 100644
--- a/psydac/fem/partitioning.py
+++ b/psydac/fem/partitioning.py
@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
 import os
 
-import numpy as np
+import cunumpy as xp
 
 from psydac.ddm.cart       import CartDecomposition, InterfaceCartDecomposition, create_interfaces_cart
 from psydac.core.bsplines  import elements_spans
@@ -58,7 +58,7 @@ def partition_coefficients(domain_decomposition, spaces, min_blocks=None):
 
         global_ends  [axis]     = m*(ee+1)-1
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     if min_blocks is None:
         min_blocks = [None] * ndims
diff --git a/psydac/fem/projectors.py b/psydac/fem/projectors.py
index d40db6a8c..25c187580 100644
--- a/psydac/fem/projectors.py
+++ b/psydac/fem/projectors.py
@@ -1,4 +1,4 @@
-import numpy as np
+import cunumpy as xp
 
 from psydac.linalg.kron     import KroneckerDenseMatrix
 from psydac.core.bsplines   import hrefinement_matrix
@@ -9,8 +9,8 @@
 def knots_to_insert(coarse_grid, fine_grid, tol=1e-14):
     """ Compute the point difference between the fine grid and coarse grid."""
 #    assert len(coarse_grid)*2-2 == len(fine_grid)-1
-    indices1 =  (np.abs(fine_grid  [:,None] - coarse_grid) < tol).any(0)
-    indices2 = ~(np.abs(coarse_grid[:,None] - fine_grid  ) < tol).any(0)
+    indices1 =  (xp.abs(fine_grid  [:,None] - coarse_grid) < tol).any(0)
+    indices2 = ~(xp.abs(coarse_grid[:,None] - fine_grid  ) < tol).any(0)
 
     intersection = coarse_grid[indices1]
     T            = fine_grid[indices2]
@@ -82,7 +82,7 @@ def knot_insertion_projection_operator(domain, codomain):
 
             if d.basis == 'M':
                 assert c.basis == 'M'
-                P = np.diag(1 / d._scaling_array) @ P @ np.diag(c._scaling_array)
+                P = xp.diag(1 / d._scaling_array) @ P @ xp.diag(c._scaling_array)
 
             ops.append(P.T)
 
@@ -92,11 +92,11 @@ def knot_insertion_projection_operator(domain, codomain):
 
             if d.basis == 'M':
                 assert c.basis == 'M'
-                P = np.diag(1 / c._scaling_array) @ P @ np.diag(d._scaling_array)
+                P = xp.diag(1 / c._scaling_array) @ P @ xp.diag(d._scaling_array)
 
             ops.append(P)
 
         else:
-            ops.append(np.eye(d.nbasis))
+            ops.append(xp.eye(d.nbasis))
 
     return KroneckerDenseMatrix(domain.coeff_space, codomain.coeff_space, *ops)
diff --git a/psydac/fem/splines.py b/psydac/fem/splines.py
index 08ceaa417..5eaaedc54 100644
--- a/psydac/fem/splines.py
+++ b/psydac/fem/splines.py
@@ -1,7 +1,9 @@
 # coding: utf-8
 # Copyright 2018 Ahmed Ratnani, Yaman Güçlü
 
-import numpy as np
+import numpy as _np
+import cunumpy as xp
+from cunumpy.xp import array_backend
 from scipy.sparse import csc_matrix, csr_matrix, dia_matrix
 
 from psydac.linalg.stencil        import StencilVectorSpace
@@ -88,10 +90,10 @@ def __init__(self, degree, knots=None, grid=None, multiplicity=None, parent_mult
         if grid is None:
             grid = breakpoints(knots, degree)
 
-        indices = np.where(np.diff(knots[degree:len(knots)-degree])>1e-15)[0]
+        indices = xp.where(xp.diff(knots[degree:len(knots)-degree])>1e-15)[0]
 
         if len(indices)>0:
-            multiplicity = np.diff(indices).max(initial=1)
+            multiplicity = max(xp.diff(indices), default=1)
         else:
             multiplicity = max(1,len(knots[degree+1:-degree-1]))
 
@@ -136,7 +138,7 @@ def __init__(self, degree, knots=None, grid=None, multiplicity=None, parent_mult
 
         # Create space of spline coefficients
         domain_decomposition = DomainDecomposition([self._ncells], [periodic])
-        cart     = CartDecomposition(domain_decomposition, [nbasis], [np.array([0])],[np.array([nbasis-1])], [self._pads], [multiplicity])
+        cart     = CartDecomposition(domain_decomposition, [nbasis], [xp.array([0])],[xp.array([nbasis-1])], [self._pads], [multiplicity])
         self._coeff_space = StencilVectorSpace(cart)
 
         # Store flag: object NOT YET prepared for interpolation / histopolation
@@ -168,9 +170,9 @@ def init_interpolation( self, dtype=float ):
         for the calculation of a spline interpolant given the values at the
         Greville points.
 
-        """
+        """        
         if self.greville.size == 1:
-            imat = np.ones((1, 1), dtype=float)
+            imat = xp.ones((1, 1), dtype=float)
         else:
             imat = collocation_matrix(
                 knots    = self.knots,
@@ -183,14 +185,26 @@ def init_interpolation( self, dtype=float ):
 
         if self.periodic:
             # Convert to CSC format and compute sparse LU decomposition
+            
+            # Convert to LAPACK banded format (see DGBTRF function)
+            if array_backend.backend == "cupy":
+                imat = imat.get()
+            else:
+                imat = _np.asanyarray(imat)
+
             self._interpolator = SparseSolver( csc_matrix( imat ) )
         else:
+
             # Convert to LAPACK banded format (see DGBTRF function)
+            if array_backend.backend == "cupy":
+                imat = imat.get()
+            else:
+                imat = _np.asanyarray(imat)
             dmat = dia_matrix( imat )
             l = abs( dmat.offsets.min() )
             u =      dmat.offsets.max()
             cmat = csr_matrix( dmat )
-            bmat = np.zeros( (1+u+2*l, cmat.shape[1]), dtype=dtype )
+            bmat = xp.zeros( (1+u+2*l, cmat.shape[1]), dtype=dtype )
             for i,j in zip( *cmat.nonzero() ):
                 bmat[u+l+i-j,j] = cmat[i,j]
             self._interpolator = BandedSolver( u, l, bmat )
@@ -215,7 +229,11 @@ def init_histopolation( self, dtype=float):
             xgrid    = self.ext_greville,
             multiplicity = self._multiplicity
         )
-
+        if array_backend.backend == "cupy":
+            imat = imat.get()
+        else:
+            imat = _np.asanyarray(imat)
+        
         self.hmat= imat
         if self.periodic:
             # Convert to CSC format and compute sparse LU decomposition
@@ -226,7 +244,7 @@ def init_histopolation( self, dtype=float):
             l = abs( dmat.offsets.min() )
             u =      dmat.offsets.max()
             cmat = csr_matrix( dmat )
-            bmat = np.zeros( (1+u+2*l, cmat.shape[1]), dtype=dtype)
+            bmat = xp.zeros( (1+u+2*l, cmat.shape[1]), dtype=dtype)
             for i,j in zip( *cmat.nonzero() ):
                 bmat[u+l+i-j,j] = cmat[i,j]
             self._histopolator = BandedSolver( u, l, bmat )
@@ -323,7 +341,7 @@ def eval_field(self, field, *eta , weights=None):
         if weights:
             coeffs *= weights[index]
 
-        return np.dot(coeffs,basis_array)
+        return xp.dot(coeffs,basis_array)
 
     # ...
     def eval_field_gradient( self, field, *eta , weights=None):
@@ -548,7 +566,7 @@ def draw(self):
         n = self.nbasis + d*self.periodic
         knots = self.knots
         fig, ax = plt.subplots()
-        xx = np.linspace(knots[0], knots[-1], 200)
+        xx = xp.linspace(knots[0], knots[-1], 200)
         for i in range(n):
             c = [0]*n
             c[i] = 1
diff --git a/psydac/fem/tensor.py b/psydac/fem/tensor.py
index 70f849c69..fecfd6441 100644
--- a/psydac/fem/tensor.py
+++ b/psydac/fem/tensor.py
@@ -7,7 +7,7 @@
 """
 from psydac.ddm.mpi import mpi as MPI
     
-import numpy as np
+import cunumpy as xp
 import itertools
 import h5py
 import os
@@ -237,14 +237,14 @@ def eval_field( self, field, *eta, weights=None):
             field.coeffs.update_ghost_regions()
 
         # Check if `x` is iterable and loop over elements
-        if isinstance(eta[0], (list, np.ndarray)) and np.ndim(eta[0]) > 0:
+        if isinstance(eta[0], (list, xp.ndarray)) and xp.ndim(eta[0]) > 0:
             for dim in range(1, self.ldim):
                 assert len(eta[0]) == len(eta[dim])
             res_list = []
             for i in range(len(eta[0])):
                 x = [eta[j][i] for j in range(self.ldim)]
                 res_list.append(self.eval_field(field, *x, weights=weights))
-            return np.array(res_list)
+            return xp.array(res_list)
 
         for (x, xlim, space) in zip( eta, self.eta_lims, self.spaces ):
 
@@ -288,15 +288,15 @@ def eval_field( self, field, *eta, weights=None):
         #
         res = coeffs
         for basis in bases[::-1]:
-            res = np.dot( res, basis )
+            res = xp.dot( res, basis )
 
 #        # Option 2: cycle over each element of 'coeffs' (touched only once)
 #        #   - Pros: no temporary objects are created
 #        #   - Cons: large number of Python iterations = number of elements in 'coeffs'
 #        #
 #        res = 0.0
-#        for idx,c in np.ndenumerate( coeffs ):
-#            ndbasis = np.prod( [b[i] for i,b in zip( idx, bases )] )
+#        for idx,c in xp.ndenumerate( coeffs ):
+#            ndbasis = xp.prod( [b[i] for i,b in zip( idx, bases )] )
 #            res    += c * ndbasis
 
         return res
@@ -425,8 +425,8 @@ def preprocess_irregular_tensor_grid(self, grid, der=0, overlap=0):
 
             # Get the cell indexes
             cell_index_i = cell_index(self.breaks[i], grid_i)
-            min_idx = np.searchsorted(cell_index_i, starts[i], side='left')
-            max_idx = np.searchsorted(cell_index_i, ends[i], side='right')
+            min_idx = xp.searchsorted(cell_index_i, starts[i], side='left')
+            max_idx = xp.searchsorted(cell_index_i, ends[i], side='right')
             # We only care about the local cells.
             cell_index_i = cell_index_i[min_idx:max_idx]
             grid_local_i = grid_i[min_idx:max_idx]
@@ -484,7 +484,7 @@ def eval_fields(self, grid, *fields, weights=None, npts_per_cell=None, overlap=0
                 weights.coeffs.update_ghost_regions()
         
         assert len(grid) == self.ldim
-        grid = [np.asarray(grid[i]) for i in range(self.ldim)]
+        grid = [xp.asarray(grid[i]) for i in range(self.ldim)]
         assert all(grid[i].ndim == grid[i + 1].ndim for i in range(self.ldim - 1))
 
         # --------------------------
@@ -499,7 +499,7 @@ def eval_fields(self, grid, *fields, weights=None, npts_per_cell=None, overlap=0
         # -> grid is tensor-product, but npts_per_cell is not the same in each cell
         elif grid[0].ndim == 1 and npts_per_cell is None:
             out_fields = self.eval_fields_irregular_tensor_grid(grid, *fields, weights=weights, overlap=overlap)
-            return [np.ascontiguousarray(out_fields[..., i]) for i in range(len(fields))]
+            return [xp.ascontiguousarray(out_fields[..., i]) for i in range(len(fields))]
 
         # Case 3. 1D arrays of coordinates and npts_per_cell is a tuple or an integer
         # -> grid is tensor-product, and each cell has the same number of evaluation points
@@ -508,10 +508,10 @@ def eval_fields(self, grid, *fields, weights=None, npts_per_cell=None, overlap=0
                 npts_per_cell = (npts_per_cell,) * self.ldim
             for i in range(self.ldim):
                 ncells_i = len(self.breaks[i]) - 1
-                grid[i] = np.reshape(grid[i], (ncells_i, npts_per_cell[i]))
+                grid[i] = xp.reshape(grid[i], (ncells_i, npts_per_cell[i]))
             out_fields = self.eval_fields_regular_tensor_grid(grid, *fields, weights=weights, overlap=overlap)
             # return a list
-            return [np.ascontiguousarray(out_fields[..., i]) for i in range(len(fields))]
+            return [xp.ascontiguousarray(out_fields[..., i]) for i in range(len(fields))]
 
         # Case 4. (self.ldim)D arrays of coordinates and no npts_per_cell
         # -> unstructured grid
@@ -555,9 +555,9 @@ def eval_fields_regular_tensor_grid(self, grid, *fields, weights=None, overlap=0
         degree, global_basis, global_spans, local_shape = self.preprocess_regular_tensor_grid(grid, der=0, overlap=overlap)
         ncells = [local_shape[i][0] for i in range(self.ldim)]
         n_eval_points = [local_shape[i][1] for i in range(self.ldim)]
-        out_fields = np.zeros((*(tuple(ncells[i] * n_eval_points[i] for i in range(self.ldim))), len(fields)), dtype=self.dtype)
+        out_fields = xp.zeros((*(tuple(ncells[i] * n_eval_points[i] for i in range(self.ldim))), len(fields)), dtype=self.dtype)
 
-        global_arr_coeffs = np.zeros(shape=(*fields[0].coeffs._data.shape, len(fields)), dtype=self.dtype)
+        global_arr_coeffs = xp.zeros(shape=(*fields[0].coeffs._data.shape, len(fields)), dtype=self.dtype)
 
         for i in range(len(fields)):
             global_arr_coeffs[..., i] = fields[i].coeffs._data
@@ -608,9 +608,9 @@ def eval_fields_irregular_tensor_grid(self, grid, *fields, weights=None, overlap
         """
         degree, global_basis, global_spans, cell_indexes, local_shape = \
             self.preprocess_irregular_tensor_grid(grid, overlap=overlap)
-        out_fields = np.zeros(tuple(local_shape) + (len(fields),), dtype=self.dtype)
+        out_fields = xp.zeros(tuple(local_shape) + (len(fields),), dtype=self.dtype)
 
-        global_arr_coeffs = np.zeros(shape=(*fields[0].coeffs._data.shape, len(fields)), dtype=self.dtype)
+        global_arr_coeffs = xp.zeros(shape=(*fields[0].coeffs._data.shape, len(fields)), dtype=self.dtype)
 
         npoints = local_shape
 
@@ -647,14 +647,14 @@ def eval_field_gradient(self, field, *eta, weights=None):
         index   = []
 
         # Check if `x` is iterable and loop over elements
-        if isinstance(eta[0], (list, np.ndarray)) and np.ndim(eta[0]) > 0:
+        if isinstance(eta[0], (list, xp.ndarray)) and xp.ndim(eta[0]) > 0:
             for dim in range(1, self.ldim):
                 assert len(eta[0]) == len(eta[dim])
             res_list = []
             for i in range(len(eta[0])):
                 x = [eta[j][i] for j in range(self.ldim)]
                 res_list.append(self.eval_field_gradient(field, *x, weights=weights))
-            return np.array(res_list)
+            return xp.array(res_list)
 
         for (x, xlim, space) in zip( eta, self.eta_lims, self.spaces ):
 
@@ -699,7 +699,7 @@ def eval_field_gradient(self, field, *eta, weights=None):
             bases = [(bases_1[d] if i==d else bases_0[i]) for i in range( self.ldim )]
             res   = coeffs
             for basis in bases[::-1]:
-                res = np.dot( res, basis )
+                res = xp.dot( res, basis )
             grad.append( res )
 
         return grad
@@ -734,7 +734,7 @@ def integral(self, f, *, nquads=None):
                 itertools.product(*[range(s, e+1) for s, e in zip(starts, ends)])
 
         # Shortcut: Numpy product of all elements in a list
-        np_prod = np.prod
+        np_prod = xp.prod
 
         # Perform Gaussian quadrature in multiple dimensions
         c = 0.0
@@ -743,7 +743,7 @@ def integral(self, f, *, nquads=None):
             x = [ points_i[k_i, :] for  points_i, k_i in zip( points, k)]
             w = [weights_i[k_i, :] for weights_i, k_i in zip(weights, k)]
 
-            for q in np.ndindex(*nq):
+            for q in xp.ndindex(*nq):
 
                 y  = [x_i[q_i] for x_i, q_i in zip(x, q)]
                 v  = [w_i[q_i] for w_i, q_i in zip(w, q)]
@@ -756,7 +756,7 @@ def integral(self, f, *, nquads=None):
             c = mpi_comm.allreduce(c)
 
         # convert to native python type if numpy to avoid errors with sympify
-        if isinstance(c, np.generic):
+        if isinstance(c, xp.generic):
             c = c.item()
         
         return c
@@ -989,8 +989,8 @@ def reduce_grid(self, axes=(), knots=()):
                                     dirichlet=space.dirichlet, basis=space.basis)
             spaces[axis] = new_space
             breaks = new_space.breaks.tolist()
-            elements_ends = np.array([breaks.index(bd) for bd in boundaries])-1
-            elements_starts = np.array([0] + (elements_ends[:-1]+1).tolist())
+            elements_ends = xp.array([breaks.index(bd) for bd in boundaries])-1
+            elements_starts = xp.array([0] + (elements_ends[:-1]+1).tolist())
 
             if periodic:
                 global_starts[axis] = elements_starts
@@ -1165,8 +1165,8 @@ def add_refined_space(self, ncells):
                 new_global_starts[-1].append(s)
                 new_global_ends  [-1].append(e-1)
 
-            new_global_starts[-1] = np.array(new_global_starts[-1])
-            new_global_ends  [-1] = np.array(new_global_ends  [-1])
+            new_global_starts[-1] = xp.array(new_global_starts[-1])
+            new_global_ends  [-1] = xp.array(new_global_ends  [-1])
 
         new_domain = domain.refine(ncells, new_global_starts, new_global_ends)
         new_space  = TensorFemSpace(new_domain, *spaces, dtype=self._coeff_space.dtype)
@@ -1240,14 +1240,14 @@ def plot_2d_decomposition(self, mapping=None, refine=10):
         [sk1, sk2], [ek1, ek2] = self.local_domain
         eta1 = refine_array_1d(V1.breaks[sk1:ek1+2], N)
         eta2 = refine_array_1d(V2.breaks[sk2:ek2+2], N)
-        pcoords = np.array([[mapping(e1, e2) for e2 in eta2] for e1 in eta1])
+        pcoords = xp.array([[mapping(e1, e2) for e2 in eta2] for e1 in eta1])
 
         # Local domain as Matplotlib polygonal patch
         AB = pcoords[   :,    0, :] # eta2 = min
         BC = pcoords[  -1,    :, :] # eta1 = max
         CD = pcoords[::-1,   -1, :] # eta2 = max (points must be reversed)
         DA = pcoords[   0, ::-1, :] # eta1 = min (points must be reversed)
-        xy = np.concatenate([AB, BC, CD, DA], axis=0)
+        xy = xp.concatenate([AB, BC, CD, DA], axis=0)
         poly = Polygon(xy, edgecolor='None')
 
         # Gather polygons on master process
@@ -1262,7 +1262,7 @@ def plot_2d_decomposition(self, mapping=None, refine=10):
         # Global grid, refined
         eta1    = refine_array_1d(V1.breaks, N)
         eta2    = refine_array_1d(V2.breaks, N)
-        pcoords = np.array([[mapping(e1, e2) for e2 in eta2] for e1 in eta1])
+        pcoords = xp.array([[mapping(e1, e2) for e2 in eta2] for e1 in eta1])
         xx      = pcoords[:, :, 0]
         yy      = pcoords[:, :, 1]
 
diff --git a/psydac/fem/tests/analytical_profiles_1d.py b/psydac/fem/tests/analytical_profiles_1d.py
index 8166d1f53..51e58069d 100644
--- a/psydac/fem/tests/analytical_profiles_1d.py
+++ b/psydac/fem/tests/analytical_profiles_1d.py
@@ -2,7 +2,7 @@
 # Copyright 2018 Yaman Güçlü
 
 import math
-import numpy as np
+import cunumpy as xp
 
 from psydac.fem.tests.analytical_profiles_base import AnalyticalProfile
 from psydac.fem.tests.utilities                import horner, falling_factorial
@@ -30,7 +30,7 @@ def poly_order( self ):
         return -1
 
     def eval( self, x, diff=0 ):
-        return self._k**diff * np.cos( 0.5*math.pi*diff + self._k*x + self._phi )
+        return self._k**diff * xp.cos( 0.5*math.pi*diff + self._k*x + self._phi )
 
     def max_norm( self, diff=0 ):
         return self._k**diff
@@ -56,7 +56,7 @@ def poly_order( self ):
         return -1
 
     def eval( self, x, diff=0 ):
-        return self._k**diff * np.sin( 0.5*math.pi*diff + self._k*x + self._phi )
+        return self._k**diff * xp.sin( 0.5*math.pi*diff + self._k*x + self._phi )
 
     def max_norm( self, diff=0 ):
         return self._k**diff
@@ -65,7 +65,7 @@ class AnalyticalProfile1D_Poly( AnalyticalProfile ):
 
     def __init__( self, deg ):
 
-        coeffs = np.random.random_sample( 1+deg )  # 0 <= c < 1
+        coeffs = xp.random.random_sample( 1+deg )  # 0 <= c < 1
         coeffs = 1.0 - coeffs                      # 0 < c <= 1
 
         self._deg    = deg
diff --git a/psydac/fem/tests/test_spline_histopolation.py b/psydac/fem/tests/test_spline_histopolation.py
index 7dc169cda..954d9efef 100644
--- a/psydac/fem/tests/test_spline_histopolation.py
+++ b/psydac/fem/tests/test_spline_histopolation.py
@@ -1,5 +1,5 @@
 import pytest
-import numpy as np
+import cunumpy as xp
 import matplotlib.pyplot as plt
 from scipy.integrate import quad
 
@@ -18,7 +18,7 @@ def histopolate_polynomial(basis, ncells, degree):
     periodic = False
 
     # Polynomial to be approximated
-    poly_coeffs = np.random.random_sample( degree+1 ) # 0 <= c < 1
+    poly_coeffs = xp.random.random_sample( degree+1 ) # 0 <= c < 1
     poly_coeffs = 1.0 - poly_coeffs                   # 0 < c <= 1
     f = lambda x : horner( x, *poly_coeffs )
 
@@ -29,7 +29,7 @@ def histopolate_polynomial(basis, ncells, degree):
 
     # Compute histopolant
     xg = Vh.ext_greville
-    Ig = np.array([quad(f, xg[i], xg[i+1])[0] for i in range(len(xg)-1)])
+    Ig = xp.array([quad(f, xg[i], xg[i+1])[0] for i in range(len(xg)-1)])
     Vh.compute_histopolant(Ig, fh)
 
     return domain, f, fh
@@ -43,11 +43,11 @@ def test_histopolation_exact(basis, ncells, degree, num_pts=100, tol=1e-11):
     domain, f, fh = histopolate_polynomial(basis, ncells, degree)
 
     # Compare to exact solution
-    x  = np.linspace(*domain, num=num_pts)
+    x  = xp.linspace(*domain, num=num_pts)
     y  = f(x)
-    yh = np.array([fh(xi) for xi in x])
+    yh = xp.array([fh(xi) for xi in x])
 
-    assert np.allclose(yh, y, rtol=tol, atol=tol)
+    assert xp.allclose(yh, y, rtol=tol, atol=tol)
 
 #==============================================================================
 @pytest.mark.parametrize('basis', ['B', 'M'])
@@ -61,21 +61,21 @@ def test_histopolation_cosine(basis, ncells, degree, periodic, num_pts=100):
     f = AnalyticalProfile1D_Cos()
 
     # Define spline space and field
-    grid, dx = np.linspace(*f.domain, num=ncells+1, retstep=True)
+    grid, dx = xp.linspace(*f.domain, num=ncells+1, retstep=True)
     Vh = SplineSpace(degree=degree, grid=grid, periodic=periodic)
     fh = FemField(Vh)
 
     # Compute histopolant
     xg = Vh.histopolation_grid
-    Ig = np.array([quad(f.eval, xl, xr)[0] for xl, xr in zip(xg[:-1], xg[1:])])
+    Ig = xp.array([quad(f.eval, xl, xr)[0] for xl, xr in zip(xg[:-1], xg[1:])])
     Vh.compute_histopolant(Ig, fh)
 
     # Compare to exact solution
-    x  = np.linspace(*f.domain, num=num_pts)
+    x  = xp.linspace(*f.domain, num=num_pts)
     y  = f.eval(x)
-    yh = np.array([fh(xi) for xi in x])
+    yh = xp.array([fh(xi) for xi in x])
 
-    max_norm_err = np.max(abs(y - yh))
+    max_norm_err = xp.max(abs(y - yh))
     err_bound    = spline_1d_error_bound(f, dx, degree)
 
     assert max_norm_err < err_bound
@@ -85,11 +85,11 @@ def test_histopolation_cosine(basis, ncells, degree, periodic, num_pts=100):
 #==============================================================================
 def compare_and_plot(domain, f, fh, num_pts=100):
 
-    x  = np.linspace(*domain, num=num_pts)
+    x  = xp.linspace(*domain, num=num_pts)
     y  = f(x)
-    yh = np.array([fh(xi) for xi in x])
+    yh = xp.array([fh(xi) for xi in x])
 
-    max_norm_err = np.max(abs(yh - y))
+    max_norm_err = xp.max(abs(yh - y))
     print("Maximum error on evaluation grid: {}".format(max_norm_err))
 
     fig, ax = plt.subplots(1, 1)
diff --git a/psydac/fem/tests/test_spline_interpolation.py b/psydac/fem/tests/test_spline_interpolation.py
index b572e45b8..52738a4f4 100644
--- a/psydac/fem/tests/test_spline_interpolation.py
+++ b/psydac/fem/tests/test_spline_interpolation.py
@@ -3,7 +3,7 @@
 
 from psydac.ddm.mpi import mpi as MPI
     
-import numpy as np
+import cunumpy as xp
 import pytest
 import time
 
@@ -26,7 +26,7 @@ def test_SplineInterpolation1D_exact( ncells, degree ):
     domain   = [-1.0, 1.0]
     periodic = False
 
-    poly_coeffs = np.random.random_sample( degree+1 ) # 0 <= c < 1
+    poly_coeffs = xp.random.random_sample( degree+1 ) # 0 <= c < 1
     poly_coeffs = 1.0 - poly_coeffs                   # 0 < c <= 1
     f = lambda x : horner( x, *poly_coeffs )
 
@@ -39,10 +39,10 @@ def test_SplineInterpolation1D_exact( ncells, degree ):
 
     space.compute_interpolant( ug, field )
 
-    xt  = np.linspace( *domain, num=100 )
-    err = np.array( [field( x ) - f( x ) for x in xt] )
+    xt  = xp.linspace( *domain, num=100 )
+    err = xp.array( [field( x ) - f( x ) for x in xt] )
 
-    max_norm_err = np.max( abs( err ) )
+    max_norm_err = xp.max( abs( err ) )
     assert max_norm_err < 1.0e-13
 
 #===============================================================================
@@ -61,7 +61,7 @@ def test_SplineInterpolation1D_cosine( ncells, degree, periodic ):
 
     f = AnalyticalProfile1D_Cos()
 
-    grid, dx = np.linspace( *f.domain, num=ncells+1, retstep=True )
+    grid, dx = xp.linspace( *f.domain, num=ncells+1, retstep=True )
     space = SplineSpace( degree=degree, grid=grid, periodic=periodic )
     field = FemField( space )
 
@@ -69,10 +69,10 @@ def test_SplineInterpolation1D_cosine( ncells, degree, periodic ):
     ug = f.eval( xg )
 
     space.compute_interpolant( ug, field )
-    xt  = np.linspace( *f.domain, num=100 )
-    err = np.array( [field( x ) - f.eval( x ) for x in xt] )
+    xt  = xp.linspace( *f.domain, num=100 )
+    err = xp.array( [field( x ) - f.eval( x ) for x in xt] )
 
-    max_norm_err = np.max( abs( err ) )
+    max_norm_err = xp.max( abs( err ) )
     err_bound    = spline_1d_error_bound( f, dx, degree )
 
     assert max_norm_err < err_bound
@@ -98,7 +98,7 @@ def test_SplineInterpolation2D_parallel_exact( nc1, nc2, deg1, deg2 ):
     periodic2 = False
 
     # Random coefficients of 1D polynomial (identical on all processes!)
-    poly_coeffs = np.random.random_sample( min(deg1,deg2)+1 ) # 0 <= c < 1
+    poly_coeffs = xp.random.random_sample( min(deg1,deg2)+1 ) # 0 <= c < 1
     poly_coeffs = 1.0 - poly_coeffs                           # 0 < c <= 1
     mpi_comm.Bcast( poly_coeffs, root=0 )
 
@@ -152,7 +152,7 @@ def test_SplineInterpolation2D_parallel_exact( nc1, nc2, deg1, deg2 ):
 
     # Compute L2 norm of error
     integrand = lambda x1,x2: (f(x1,x2)-tensor_field(x1,x2))**2
-    l2_error  = np.sqrt( tensor_space.integral( integrand ) )
+    l2_error  = xp.sqrt( tensor_space.integral( integrand ) )
 
     # Print some information to terminal
     for i in range( mpi_size ):
diff --git a/psydac/fem/tests/utilities.py b/psydac/fem/tests/utilities.py
index 838212942..fa1cd0d26 100644
--- a/psydac/fem/tests/utilities.py
+++ b/psydac/fem/tests/utilities.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 # Copyright 2018 Yaman Güçlü
 
-import numpy as np
+import cunumpy as xp
 
 #===============================================================================
 def horner( x, *poly_coeffs ):
@@ -18,10 +18,10 @@ def random_grid( domain, ncells, random_fraction ):
     """ Create random grid over 1D domain with given number of cells.
     """
     # Create uniform grid on [0,1]
-    x = np.linspace( 0.0, 1.0, ncells+1 )
+    x = xp.linspace( 0.0, 1.0, ncells+1 )
 
     # Apply random displacement to all points, then sort grid
-    x += (np.random.random_sample( ncells+1 )-0.5) * (random_fraction/ncells)
+    x += (xp.random.random_sample( ncells+1 )-0.5) * (random_fraction/ncells)
     x.sort()
 
     # Apply linear transformation y=m*x+q to match domain limits
diff --git a/psydac/fem/vector.py b/psydac/fem/vector.py
index 8af1f0f5b..7013383de 100644
--- a/psydac/fem/vector.py
+++ b/psydac/fem/vector.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 
 # TODO: - have a block version for VectorSpace when all component spaces are the same
-import numpy as np
+import cunumpy as xp
 
 from functools import reduce
 from typing import Optional
@@ -430,7 +430,7 @@ def __init__(self, *spaces, connectivity=None):
 
         # ... make sure that all spaces have the same parametric dimension
         ldims = [V.ldim for V in self.spaces]
-        assert len(np.unique(ldims)) == 1
+        assert len(xp.unique(ldims)) == 1
 
         self._ldim = ldims[0]
         # ...
diff --git a/psydac/linalg/basic.py b/psydac/linalg/basic.py
index c298d687b..ce751d149 100644
--- a/psydac/linalg/basic.py
+++ b/psydac/linalg/basic.py
@@ -11,7 +11,7 @@
 from types import LambdaType 
 from inspect import signature
 
-import numpy as np
+import cunumpy as xp
 from scipy.sparse import coo_matrix
 
 from psydac.utilities.utils import is_real
@@ -189,8 +189,8 @@ def conjugate(self, out=None):
         
         Please note that x.conjugate(out=x) modifies x in place and returns x.
 
-        If the field is real (i.e. `self.dtype in (np.float32, np.float64)`) this method is equivalent to `copy`.
-        If the field is complex (i.e. `self.dtype in (np.complex64, np.complex128)`) this method returns
+        If the field is real (i.e. `self.dtype in (xp.float32, xp.float64)`) this method is equivalent to `copy`.
+        If the field is complex (i.e. `self.dtype in (xp.complex64, xp.complex128)`) this method returns
         the complex conjugate of `self`, element-wise.
 
         The behavior of this function is similar to `numpy.conjugate(self, out=None)`.
@@ -240,8 +240,8 @@ def __itruediv__(self, a):
     def conj(self, out=None):
         """Compute the complex conjugate vector.
 
-        If the field is real (i.e. `self.dtype in (np.float32, np.float64)`) this method is equivalent to `copy`.
-        If the field is complex (i.e. `self.dtype in (np.complex64, np.complex128)`) this method returns
+        If the field is real (i.e. `self.dtype in (xp.float32, xp.float64)`) this method is equivalent to `copy`.
+        If the field is complex (i.e. `self.dtype in (xp.complex64, xp.complex128)`) this method returns
         the complex conjugate of `self`, element-wise.
 
         The behavior of this function is similar to `numpy.conj(self, out=None)`.
@@ -340,7 +340,7 @@ def __mul__(self, c):
         unless c = 0 or c = 1, in which case either a ZeroOperator or self is returned.
 
         """
-        assert np.isscalar(c)
+        assert xp.isscalar(c)
         if c==0:
             return ZeroOperator(self.domain, self.codomain)
         elif c == 1:
@@ -532,7 +532,7 @@ def copy(self):
         return ZeroOperator(self.domain, self.codomain)
 
     def toarray(self):
-        return np.zeros(self.shape, dtype=self.dtype) 
+        return xp.zeros(self.shape, dtype=self.dtype) 
 
     def tosparse(self):
         from scipy.sparse import csr_matrix
@@ -568,7 +568,7 @@ def __sub__(self, B):
         return -B
 
     def __mul__(self, c):
-        assert np.isscalar(c)
+        assert xp.isscalar(c)
         return self
 
     def __matmul__(self, B):
@@ -614,7 +614,7 @@ def copy(self):
         return IdentityOperator(self.domain, self.codomain)
 
     def toarray(self):
-        return np.diag(np.ones(self.domain.dimension , dtype=self.dtype)) 
+        return xp.diag(xp.ones(self.domain.dimension , dtype=self.dtype)) 
 
     def tosparse(self):
         from scipy.sparse import identity
@@ -655,8 +655,8 @@ def __init__(self, domain, codomain, c, A):
 
         assert isinstance(domain, VectorSpace)
         assert isinstance(codomain, VectorSpace)
-        assert np.isscalar(c)
-        assert np.iscomplexobj(c) == (codomain._dtype == complex)
+        assert xp.isscalar(c)
+        assert xp.iscomplexobj(c) == (codomain._dtype == complex)
         assert isinstance(A, LinearOperator)
         assert domain   == A.domain
         assert codomain == A.codomain
@@ -703,7 +703,7 @@ def tosparse(self):
         return self._scalar*csr_matrix(self._operator.toarray())
 
     def transpose(self, conjugate=False):
-        return ScaledLinearOperator(domain=self.codomain, codomain=self.domain, c=self._scalar if not conjugate else np.conjugate(self._scalar), A=self._operator.transpose(conjugate=conjugate))
+        return ScaledLinearOperator(domain=self.codomain, codomain=self.domain, c=self._scalar if not conjugate else xp.conjugate(self._scalar), A=self._operator.transpose(conjugate=conjugate))
 
     def __neg__(self):
         return ScaledLinearOperator(domain=self.domain, codomain=self.codomain, c=-1*self._scalar, A=self._operator)
@@ -779,7 +779,7 @@ def dtype(self):
         return None
 
     def toarray(self):
-        out = np.zeros(self.shape, dtype=self.dtype)
+        out = xp.zeros(self.shape, dtype=self.dtype)
         for a in self._addends:
             out += a.toarray()
         return out
diff --git a/psydac/linalg/block.py b/psydac/linalg/block.py
index 7db84a9f4..e292cc473 100644
--- a/psydac/linalg/block.py
+++ b/psydac/linalg/block.py
@@ -2,7 +2,7 @@
 #
 # Copyright 2018 Jalal Lakhlili, Yaman Güçlü
 
-import numpy as np
+import cunumpy as xp
 
 from types import MappingProxyType
 from scipy.sparse import bmat, lil_matrix
@@ -48,7 +48,7 @@ def __init__(self, *spaces, connectivity=None):
         # Store spaces in a Tuple, because they will not be changed
         self._spaces = tuple(spaces)
 
-        if all(np.dtype(s.dtype)==np.dtype(spaces[0].dtype) for s in spaces):
+        if all(xp.dtype(s.dtype)==xp.dtype(spaces[0].dtype) for s in spaces):
             self._dtype  = spaces[0].dtype
         else:
             raise NotImplementedError("The matrices domains don't have the same data type.")
@@ -278,7 +278,7 @@ def space(self):
     # ...
     def toarray(self, order='C'):
         """ Convert to Numpy 1D array. """
-        return np.concatenate([bi.toarray(order=order) for bi in self._blocks])
+        return xp.concatenate([bi.toarray(order=order) for bi in self._blocks])
 
     #...
     def copy(self, out=None):
@@ -478,7 +478,7 @@ def toarray_local(self, order='C'):
         """
 
         blocks = [v.toarray_local(order=order) for v in self._blocks]
-        return np.block([blocks])[0]
+        return xp.block([blocks])[0]
 
     # ...
     def topetsc(self):
@@ -537,8 +537,12 @@ def __init__(self, V1, V2, blocks=None):
                     self[i, j] = Lij
 
             elif isinstance(blocks, (list, tuple)):
-                blocks = np.array(blocks, dtype=object)
-                for (i, j), Lij in np.ndenumerate(blocks):
+                # blocks = xp.array(blocks, dtype=object)
+                # for (i, j), Lij in xp.ndenumerate(blocks):
+                #     self[i, j] = Lij
+                import numpy as _np  # ensures CPU-side object array creation
+                blocks = _np.array(blocks, dtype=object)
+                for (i, j), Lij in _np.ndenumerate(blocks):
                     self[i, j] = Lij
 
             else:
@@ -616,7 +620,7 @@ def conj(self, out=None):
 #                B_ij = B[i, j]
 #                if not ( A_ij is B_ij ):
 #                    if not (((A_ij is None) or (isinstance(A_ij, ZeroOperator))) & ((B_ij is None) or (isinstance(B_ij, ZeroOperator)))):
-#                        if not ( np.array_equal(A_ij.toarray(), B_ij.toarray()) ):
+#                        if not ( xp.array_equal(A_ij.toarray(), B_ij.toarray()) ):
 #                            return False
 #        return True
 
@@ -1360,20 +1364,20 @@ def set_backend(self, backend, precompiled=False):
                     key_str = ''.join(str(i) for i in key)
                     starts_k = starts[k]
                     for i in range(len(starts_k)):
-                        self._args['s{}_{}'.format(key_str, i+1)] = np.int64(starts_k[i])
+                        self._args['s{}_{}'.format(key_str, i+1)] = xp.int64(starts_k[i])
 
                 for k,key in enumerate(keys):
                     key_str = ''.join(str(i) for i in key)
                     nrows_k  = nrows[k]
                     for i in range(len(nrows_k)):
-                        self._args['n{}_{}'.format(key_str, i+1)] = np.int64(nrows_k[i])
+                        self._args['n{}_{}'.format(key_str, i+1)] = xp.int64(nrows_k[i])
 
 
                 for k,key in enumerate(keys):
                     key_str       = ''.join(str(i) for i in key)
                     nrows_extra_k = nrows_extra[k]
                     for i in range(len(nrows_extra_k)):
-                        self._args['ne{}_{}'.format(key_str, i+1)] = np.int64(nrows_extra_k[i])
+                        self._args['ne{}_{}'.format(key_str, i+1)] = xp.int64(nrows_extra_k[i])
 
             else:
                 dot = LinearOperatorDot(ndim,
@@ -1398,19 +1402,19 @@ def set_backend(self, backend, precompiled=False):
                     key_str       = ''.join(str(i) for i in key)
                     starts_k      = starts[k]
                     for i in range(len(starts_k)):
-                        self._args['s{}_{}'.format(key_str, i+1)] = np.int64(starts_k[i])
+                        self._args['s{}_{}'.format(key_str, i+1)] = xp.int64(starts_k[i])
 
                 for k,key in enumerate(keys):
                     key_str       = ''.join(str(i) for i in key)
                     nrows_k       = nrows[k]
                     for i in range(len(nrows_k)):
-                        self._args['n{}_{}'.format(key_str, i+1)] = np.int64(nrows_k[i])
+                        self._args['n{}_{}'.format(key_str, i+1)] = xp.int64(nrows_k[i])
 
                 for k,key in enumerate(keys):
                     key_str       = ''.join(str(i) for i in key)
                     nrows_extra_k = nrows_extra[k]
                     for i in range(len(nrows_extra_k)):
-                        self._args['ne{}_{}'.format(key_str, i+1)] = np.int64(nrows_extra_k[i])
+                        self._args['ne{}_{}'.format(key_str, i+1)] = xp.int64(nrows_extra_k[i])
 
         else:
             dot = LinearOperatorDot(ndim,
diff --git a/psydac/linalg/direct_solvers.py b/psydac/linalg/direct_solvers.py
index 2d57c0f06..bbd2f24fb 100644
--- a/psydac/linalg/direct_solvers.py
+++ b/psydac/linalg/direct_solvers.py
@@ -2,7 +2,8 @@
 # Copyright 2018 Jalal Lakhlili, Yaman Güçlü
 
 from abc                 import abstractmethod
-import numpy               as np
+import cunumpy as xp
+from cunumpy.xp import array_backend
 from scipy.linalg.lapack import dgbtrf, dgbtrs, sgbtrf, sgbtrs, cgbtrf, cgbtrs, zgbtrf, zgbtrs
 from scipy.sparse        import spmatrix
 from scipy.sparse.linalg import splu
@@ -35,27 +36,31 @@ def __init__(self, u, l, bmat, transposed=False):
         self._transposed = transposed
 
         # ... LU factorization
-        if bmat.dtype == np.float32:
+        if bmat.dtype == xp.float32:
             self._factor_function = sgbtrf
             self._solver_function = sgbtrs
-        elif bmat.dtype == np.float64:
+        elif bmat.dtype == xp.float64:
             self._factor_function = dgbtrf
             self._solver_function = dgbtrs
-        elif bmat.dtype == np.complex64:
+        elif bmat.dtype == xp.complex64:
             self._factor_function = cgbtrf
             self._solver_function = cgbtrs
-        elif bmat.dtype == np.complex128:
+        elif bmat.dtype == xp.complex128:
             self._factor_function = zgbtrf
             self._solver_function = zgbtrs
         else:
             msg = f'Cannot create a BandedSolver for bmat.dtype = {bmat.dtype}'
             raise NotImplementedError(msg)
-
+        # print(f"{bmat = } {type(bmat) = }")
+        if hasattr(bmat, "get"):  # CuPy array
+            bmat = bmat.get()
+        else:
+            bmat = xp.asanyarray(bmat)
         self._bmat, self._ipiv, self._finfo = self._factor_function(bmat, l, u)
 
         self._sinfo = None
 
-        self._space = np.ndarray
+        self._space = xp.ndarray
         self._dtype = bmat.dtype
 
     @property
@@ -127,8 +132,16 @@ def solve(self, rhs, out=None):
             # TODO: handle non-contiguous views?
 
             # we want FORTRAN-contiguous data (default is assumed to be C contiguous)
-            _, self._sinfo = self._solver_function(self._bmat, self._l, self._u, out.T, self._ipiv, overwrite_b=True,
+            from cunumpy.xp import array_backend
+            if array_backend.backend == "numpy":
+                _, self._sinfo = self._solver_function(self._bmat, self._l, self._u, out.T, self._ipiv, overwrite_b=True,
+                                                   trans=transposed)
+            else:
+                # GPU
+                out_cpu = out.get()
+                _, self._sinfo = self._solver_function(self._bmat, self._l, self._u, out_cpu.T, self._ipiv, overwrite_b=True,
                                                    trans=transposed)
+                out.set(out_cpu)
 
         return out
 
@@ -147,7 +160,7 @@ def __init__(self, spmat, transposed=False):
 
         assert isinstance(spmat, spmatrix)
 
-        self._space = np.ndarray
+        self._space = xp.ndarray
         self._splu  = splu(spmat.tocsc())
         self._transposed = transposed
 
@@ -196,6 +209,11 @@ def solve(self, rhs, out=None):
             assert out.dtype == rhs.dtype
 
             # currently no in-place solve exposed
-            out[:] = self._splu.solve(rhs.T, trans='T' if transposed else 'N').T
+            if array_backend.backend == "numpy":
+                out[:] = self._splu.solve(rhs.T, trans='T' if transposed else 'N').T
+            else:
+                rhs_cpu = rhs.get()
+                result_cpu = self._splu.solve(rhs_cpu.T, trans='T' if transposed else 'N').T
+                out[:] = xp.asarray(result_cpu)
 
         return out
diff --git a/psydac/linalg/fft.py b/psydac/linalg/fft.py
index 5d67d178b..180d3744f 100644
--- a/psydac/linalg/fft.py
+++ b/psydac/linalg/fft.py
@@ -2,7 +2,7 @@
 from psydac.linalg.stencil import StencilVectorSpace
 from psydac.linalg.kron import KroneckerLinearSolver
 
-import numpy as np
+import cunumpy as xp
 import scipy.fft as scifft
 import os
 
@@ -47,14 +47,14 @@ def __init__(self, function):
 
         @property
         def space(self):
-            return np.ndarray
+            return xp.ndarray
 
         def transpose(self):
             raise NotImplementedError('transpose() is not implemented for OneDimSolvers')
         
         def solve(self, rhs, out=None):
             if out is None:
-                out = np.empty_like(rhs)
+                out = xp.empty_like(rhs)
             
             if out is not rhs:
                 out[:] = rhs
@@ -119,7 +119,7 @@ class DistributedFFT(DistributedFFTBase):
     def __init__(self, space, norm=None, workers=os.environ.get('OMP_NUM_THREADS', None)):
         # only allow complex data types
         assert isinstance(space, StencilVectorSpace)
-        assert np.dtype(space.dtype).kind == 'c'
+        assert xp.dtype(space.dtype).kind == 'c'
         workers = int(workers) if workers is not None else None
 
         super().__init__(space, lambda out: scifft.fft(
@@ -147,7 +147,7 @@ class DistributedIFFT(DistributedFFTBase):
     def __init__(self, space, norm=None, workers=os.environ.get('OMP_NUM_THREADS', None)):
         # only allow complex data types
         assert isinstance(space, StencilVectorSpace)
-        assert np.dtype(space.dtype).kind == 'c'
+        assert xp.dtype(space.dtype).kind == 'c'
         workers = int(workers) if workers is not None else None
         
         super().__init__(space, lambda out: scifft.ifft(
diff --git a/psydac/linalg/kron.py b/psydac/linalg/kron.py
index f0a9151d8..a21ec743f 100644
--- a/psydac/linalg/kron.py
+++ b/psydac/linalg/kron.py
@@ -1,7 +1,7 @@
 #coding = utf-8
 from functools import reduce
 
-import numpy as np
+import cunumpy as xp
 from scipy.sparse import kron
 from scipy.sparse import coo_matrix
 
@@ -76,7 +76,7 @@ def mats( self ):
     # ...
     def dot(self, x, out=None):
 
-        dot = np.dot
+        dot = xp.dot
 
         assert isinstance(x, StencilVector)
         assert x.space is self.domain
@@ -100,14 +100,14 @@ def dot(self, x, out=None):
         nrows  = tuple(e-s+1 for s,e in zip(starts, ends))
         pnrows = tuple(2*p+1 for p in pads)
 
-        for ii in np.ndindex(*nrows):
+        for ii in xp.ndindex(*nrows):
             v = 0.
             xx = tuple(i+p*s for i,p,s in zip(ii, pads, shifts))
 
-            for jj in np.ndindex(*pnrows):
+            for jj in xp.ndindex(*pnrows):
                 i_mats = [mat._data[s, j] for s,j,mat in zip(xx, jj, mats)]
                 ii_jj = tuple(i+j+(s-1)*p for i,j,p,s in zip(ii, jj, pads, shifts))
-                v += x._data[ii_jj] * np.prod(i_mats)
+                v += x._data[ii_jj] * xp.prod(i_mats)
 
             out._data[xx] = v
 
@@ -145,7 +145,7 @@ def __getitem__(self, key):
         cols = key[self.ndim:]
         mats = self.mats
         elements = [A[i,j] for A,i,j in zip(mats, rows, cols)]
-        return np.prod(elements)
+        return xp.prod(elements)
 
     def tostencil(self):
 
@@ -176,14 +176,14 @@ def _tostencil(M, mats, nrows, nrows_extra, pads, xpads):
         diff   = [xp-p for xp,p in zip(xpads, pads)]
         ndim   = len(nrows)
 
-        for xx in np.ndindex( *nrows ):
+        for xx in xp.ndindex( *nrows ):
 
             ii = tuple(xp + x for xp, x in zip(xpads, xx) )
 
-            for kk in np.ndindex( *ndiags ):
+            for kk in xp.ndindex( *ndiags ):
 
                 values        = [mat[i,k] for mat,i,k in zip(mats, ii, kk)]
-                M[(*ii, *kk)] = np.prod(values)
+                M[(*ii, *kk)] = xp.prod(values)
         
         # handle partly-multiplied rows
         new_nrows = nrows.copy()
@@ -193,7 +193,7 @@ def _tostencil(M, mats, nrows, nrows_extra, pads, xpads):
             del rows[d]
 
             for n in range(er):
-                for xx in np.ndindex(*rows):
+                for xx in xp.ndindex(*rows):
                     xx = list(xx)
                     xx.insert(d, nrows[d]+n)
 
@@ -204,9 +204,9 @@ def _tostencil(M, mats, nrows, nrows_extra, pads, xpads):
                     kk     = [slice(None,diag) for diag in ndiags]
                     ii_kk  = tuple( list(ii) + kk )
 
-                    for kk in np.ndindex( *ndiags ):
+                    for kk in xp.ndindex( *ndiags ):
                         values        = [mat[i,k] for mat,i,k in zip(mats, ii, kk)]
-                        M[(*ii, *kk)] = np.prod(values)
+                        M[(*ii, *kk)] = xp.prod(values)
             new_nrows[d] += er
 
     def tosparse(self):
@@ -244,14 +244,14 @@ def __init__(self, V, W, *args , with_pads=False):
         assert V.pads == W.pads
 
         for i,A in enumerate(args):
-            assert isinstance(A, np.ndarray)
+            assert isinstance(A, xp.ndarray)
             if with_pads:
                 assert A.shape[1] == V.npts[i] + 2*V.pads[i]
             else:
                 assert A.shape[1] == V.npts[i]
 
         if not with_pads:
-            args = [np.pad(a,p) for a,p in zip(args, W.pads)]
+            args = [xp.pad(a,p) for a,p in zip(args, W.pads)]
 
         self._domain   = V
         self._codomain = W
@@ -288,7 +288,7 @@ def mats(self):
     # ...
     def dot(self, x, out=None):
 
-        dot = np.dot
+        dot = xp.dot
 
         assert isinstance(x, StencilVector)
         assert x.space is self.domain
@@ -316,10 +316,10 @@ def dot(self, x, out=None):
         x_data   = x._data.ravel()
         out_data = out._data
 
-        for xx in np.ndindex(*nrows):
+        for xx in xp.ndindex(*nrows):
             ii     = tuple(x+p for x,p in zip(xx,pads))
             i_mats = [mat[i+s, k] for i,s,k,mat in zip(ii, c_starts, kk, mats)]
-            out_data[ii] = np.dot(x_data, np.outer(*i_mats).ravel())
+            out_data[ii] = xp.dot(x_data, xp.outer(*i_mats).ravel())
 
         # IMPORTANT: flag that ghost regions are not up-to-date
         out.ghost_regions_in_sync = False
@@ -435,14 +435,14 @@ def _setup_solvers(self):
         (which potentially utilize MPI).
         """
         # slice sizes
-        starts = np.array(self._domain.starts)
-        ends = np.array(self._domain.ends) + 1
+        starts = xp.array(self._domain.starts)
+        ends = xp.array(self._domain.ends) + 1
         self._slice = tuple([slice(s, e) for s,e in zip(starts, ends)])
 
         # local and global sizes
         nglobals = self._domain.npts
         nlocals = ends - starts
-        self._localsize = np.prod(nlocals)
+        self._localsize = xp.prod(nlocals)
         mglobals = self._localsize // nlocals
         self._nlocals = nlocals
 
@@ -485,7 +485,7 @@ def _setup_permutations(self):
 
         # we use a single permutation for all steps
         # it is: (n, 1, 2, ..., n-1)
-        self._perm = np.arange(self._ndim)
+        self._perm = xp.arange(self._ndim)
         self._perm[1:] = self._perm[:-1]
         self._perm[0] = self._ndim - 1
 
@@ -502,13 +502,13 @@ def _allocate_temps(self):
         """
         Allocates all temporary data needed for the solve operation.
         """
-        temp1 = np.empty((self._tempsize,), dtype=self._dtype)
+        temp1 = xp.empty((int(self._tempsize),), dtype=self._dtype)
         if self._ndim <= 1 and self._allserial:
             # if ndim==1 and we have no parallelism,
             # we can avoid allocating a second temp array
             temp2 = None
         else:
-            temp2 = np.empty((self._tempsize,), dtype=self._dtype)
+            temp2 = xp.empty((int(self._tempsize),), dtype=self._dtype)
         return temp1, temp2
 
     @property
@@ -610,12 +610,17 @@ def _reorder_temp_to_temp(self, source, target, i):
         Does not allocate any new array.
         """
         sourceview = source[:self._localsize]
+        self._shapes[i] = tuple(int(x) for x in self._shapes[i])
+        self._shapes[i+1] = tuple(int(x) for x in self._shapes[i+1])
         sourceview.shape = self._shapes[i]
 
         targetview = target[:self._localsize]
         targetview.shape = self._shapes[i+1]
+        
+        # targetview[:] = sourceview.transpose(self._perm)
+        perm = tuple(int(p) for p in self._perm)
 
-        targetview[:] = sourceview.transpose(self._perm)
+        targetview[:] = sourceview.transpose(perm)
     
     def _reorder_temp_to_outslice(self, source, outslice):
         """
@@ -625,7 +630,9 @@ def _reorder_temp_to_outslice(self, source, outslice):
         sourceview = source[:self._localsize]
         sourceview.shape = self._shapes[-1]
 
-        outslice[:] = sourceview.transpose(self._perm)
+        # outslice[:] = sourceview.transpose(self._perm)
+        perm = tuple(int(p) for p in self._perm)
+        outslice[:] = sourceview.transpose(perm)
 
     class KroneckerSolverSerialPass:
         """
@@ -675,7 +682,7 @@ def solve_pass(self, workmem, tempmem):
             """
             # reshape necessary memory in column-major
             view = workmem[:self._datasize]
-            view.shape = (self._numrhs,self._dimrhs)
+            view.shape = (int(self._numrhs), int(self._dimrhs))
 
             # call solver in in-place mode
             self._solver.solve(view, out=view)
@@ -786,14 +793,14 @@ def __init__(self, solver, mpi_type, i, cart, mglobal, nglobal, nlocal, localsiz
             # where N = floor(mglobaldata / comm.size)
             mlocal_pre = mglobal // comm.size
             mlocal_add = mglobal % comm.size
-            sourcesizes = np.full((comm.size,), mlocal_pre, dtype=int)
+            sourcesizes = xp.full((comm.size,), mlocal_pre, dtype=int)
             sourcesizes[:mlocal_add] += 1
             mlocal = sourcesizes[comm.rank]
             sourcesizes *= nlocal
 
             # disps, created from the sizes
-            sourcedisps = np.zeros((comm.size+1,), dtype=int)
-            np.cumsum(sourcesizes, out=sourcedisps[1:])
+            sourcedisps = xp.zeros((comm.size+1,), dtype=int)
+            xp.cumsum(sourcesizes, out=sourcedisps[1:])
             sourcedisps = sourcedisps[:-1]
 
             # target MPI sizes and disps
diff --git a/psydac/linalg/solvers.py b/psydac/linalg/solvers.py
index 8c6505ca7..ece44b71c 100644
--- a/psydac/linalg/solvers.py
+++ b/psydac/linalg/solvers.py
@@ -3,7 +3,7 @@
 This module provides iterative solvers and preconditioners.
 
 """
-import numpy as np
+import cunumpy as xp
 from math import sqrt
 
 from psydac.utilities.utils  import is_real
@@ -1160,7 +1160,7 @@ def solve(self, b, out=None):
         itn   = 0
         rnorm = 0
 
-        eps = np.finfo(b.dtype).eps
+        eps = xp.finfo(b.dtype).eps
 
         A.dot(x, out=y)
         y -= b
@@ -1178,7 +1178,7 @@ def solve(self, b, out=None):
         rhs2    = 0
         tnorm2  = 0
         gmax    = 0
-        gmin    = np.finfo(b.dtype).max
+        gmin    = xp.finfo(b.dtype).max
         cs      = -1
         sn      = 0
         w_new  *= 0.0
@@ -1645,7 +1645,7 @@ def solve(self, b, out=None):
 
             test1 = normr / normb
             if (normA * normr) != 0:test2 = normar / (normA * normr)
-            else:test2 = np.infty
+            else:test2 = xp.infty
             test3 = 1 / condA
             t1    = test1 / (1 + normA * normx / normb)
             rtol  = btol + atol * normA * normx / normb
@@ -1736,7 +1736,7 @@ def __init__(self, A, *, x0=None, tol=1e-6, maxiter=100, verbose=False, recycle=
         self._tmps = {key: self.domain.zeros() for key in ("r", "p")}
 
         # Initialize upper Hessenberg matrix
-        self._H = np.zeros((self._options["maxiter"] + 1, self._options["maxiter"]), dtype=A.domain.dtype)
+        self._H = xp.zeros((self._options["maxiter"] + 1, self._options["maxiter"]), dtype=A.domain.dtype)
         self._Q = []
         self._info = None
 
@@ -1862,7 +1862,7 @@ def solve(self, b, out=None):
     def solve_triangular(self, T, d):
         # Backwards substitution. Assumes T is upper triangular
         k = T.shape[0]
-        y = np.zeros((k,), dtype=self._A.domain.dtype)
+        y = xp.zeros((k,), dtype=self._A.domain.dtype)
 
         for k1 in range(k):
             temp = 0.
diff --git a/psydac/linalg/stencil.py b/psydac/linalg/stencil.py
index 85b03c4e4..6cedbc48b 100644
--- a/psydac/linalg/stencil.py
+++ b/psydac/linalg/stencil.py
@@ -5,8 +5,8 @@
 import os
 import warnings
 
-import numpy as np
-
+import cunumpy as xp
+from cunumpy.xp import array_backend
 from types        import MappingProxyType
 from scipy.sparse import coo_matrix, diags as sp_diags
 
@@ -72,8 +72,8 @@ def compute_diag_len(pads, shifts_domain, shifts_codomain, return_padding=False)
     ep : (int)
         Padding that constitutes the starting index of the non zero elements.
     """
-    n  = ((np.ceil((pads+1)/shifts_codomain)-1)*shifts_domain).astype('int')
-    ep = -np.minimum(0, n-pads)
+    n  = ((xp.ceil((pads+1)/shifts_codomain)-1)*shifts_domain).astype('int')
+    ep = -xp.minimum(0, n-pads)
     n  = n + ep + pads + 1
     if return_padding:
         return n.astype('int'), ep.astype('int')
@@ -168,7 +168,9 @@ def __init__(self, cart, dtype=float):
             self._inner_func = self._inner_python
 
         # Constant arguments for inner product: total number of ghost cells
-        self._inner_consts = tuple(np.int64(p * s) for p, s in zip(self._pads, self._shifts))
+        # self._inner_consts = tuple(xp.int64(p) * xp.int64(s) for p, s in zip(self._pads, self._shifts))
+        self._inner_consts = tuple(int(p * s) for p, s in zip(self._pads, self._shifts))
+
 
         # TODO [YG, 06.09.2023]: print warning if pure Python functions are used
 
@@ -184,7 +186,7 @@ def _axpy_python(self, a, x, y):
     @staticmethod
     def _inner_python(v1, v2, nghost):
         index = tuple(slice(ng, -ng) for ng in nghost)
-        return np.vdot(v1[index].flat, v2[index].flat)
+        return xp.vdot(v1[index].flat, v2[index].flat)
 
     #--------------------------------------
     # Abstract interface
@@ -194,7 +196,7 @@ def dimension(self):
         """ The dimension of a vector space V is the cardinality
             (i.e. the number of vectors) of a basis of V over its base field.
         """
-        return np.prod(self._npts)
+        return xp.prod(self._npts)
 
     # ...
     @property
@@ -443,15 +445,16 @@ def __init__(self, V):
         self._space          = V
         self._sizes          = V.shape
         self._ndim           = len(V.npts)
-        self._data           = np.zeros(V.shape, dtype=V.dtype)
-        self._dot_send_data  = np.zeros((1,), dtype=V.dtype)
-        self._dot_recv_data  = np.zeros((1,), dtype=V.dtype)
+        # self._data           = xp.zeros(V.shape, dtype=V.dtype)
+        self._data = xp.zeros(tuple(int(s) for s in V.shape), dtype=V.dtype)
+        self._dot_send_data  = xp.zeros((1,), dtype=V.dtype)
+        self._dot_recv_data  = xp.zeros((1,), dtype=V.dtype)
         self._interface_data = {}
         self._requests       = None
 
         # allocate data for the boundary that shares an interface
         for axis, ext in V.interfaces:
-            self._interface_data[axis, ext] = np.zeros(V.interfaces[axis, ext].shape, dtype=V.dtype)
+            self._interface_data[axis, ext] = xp.zeros(V.interfaces[axis, ext].shape, dtype=V.dtype)
 
         #prepare communications
         if V.cart.is_parallel and not V.cart.is_comm_null and isinstance(V.cart, CartDecomposition):
@@ -510,9 +513,9 @@ def copy(self, out=None):
         if self is out:
             return self
         w = out or StencilVector( self._space )
-        np.copyto(w._data, self._data, casting='no')
+        xp.copyto(w._data, self._data, casting='no')
         for axis, ext in self._space.interfaces:
-            np.copyto(w._interface_data[axis, ext], self._interface_data[axis, ext], casting='no')
+            xp.copyto(w._interface_data[axis, ext], self._interface_data[axis, ext], casting='no')
         w._sync = self._sync
         return w
 
@@ -523,27 +526,27 @@ def conjugate(self, out=None):
             assert out.space is self.space
         else:
             out = StencilVector(self.space)
-        np.conjugate(self._data, out=out._data, casting='no')
+        xp.conjugate(self._data, out=out._data, casting='no')
         for axis, ext in self._space.interfaces:
-            np.conjugate(self._interface_data[axis, ext], out=out._interface_data[axis, ext], casting='no')
+            xp.conjugate(self._interface_data[axis, ext], out=out._interface_data[axis, ext], casting='no')
         out._sync = self._sync
         return out
 
     #...
     def __neg__(self):
         w = StencilVector( self._space )
-        np.negative(self._data, out=w._data)
+        xp.negative(self._data, out=w._data)
         for axis, ext in self._space.interfaces:
-            np.negative(self._interface_data[axis, ext], out=w._interface_data[axis, ext])
+            xp.negative(self._interface_data[axis, ext], out=w._interface_data[axis, ext])
         w._sync = self._sync
         return w
 
     #...
     def __mul__(self, a):
         w = StencilVector( self._space )
-        np.multiply(self._data, a, out=w._data)
+        xp.multiply(self._data, a, out=w._data)
         for axis, ext in self._space.interfaces:
-            np.multiply(self._interface_data[axis, ext], a, out=w._interface_data[axis, ext])
+            xp.multiply(self._interface_data[axis, ext], a, out=w._interface_data[axis, ext])
         w._sync = self._sync
         return w
 
@@ -552,9 +555,9 @@ def __add__(self, v):
         assert isinstance( v, StencilVector )
         assert v._space is self._space
         w = StencilVector( self._space )
-        np.add(self._data, v._data, out=w._data)
+        xp.add(self._data, v._data, out=w._data)
         for axis, ext in self._space.interfaces:
-            np.add(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext])
+            xp.add(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext])
         w._sync = self._sync and v._sync
         return w
 
@@ -563,9 +566,9 @@ def __sub__(self, v):
         assert isinstance( v, StencilVector )
         assert v._space is self._space
         w = StencilVector( self._space )
-        np.subtract(self._data, v._data, out=w._data)
+        xp.subtract(self._data, v._data, out=w._data)
         for axis, ext in self._space.interfaces:
-            np.subtract(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext])
+            xp.subtract(self._interface_data[axis, ext], v._interface_data[axis, ext], out=w._interface_data[axis, ext])
         w._sync = self._sync and v._sync
         return w
 
@@ -631,7 +634,7 @@ def toarray_local(self , *, order='C'):
 
     # ...
     def _toarray_parallel_no_pads(self, order='C'):
-        a         = np.zeros( self.space.npts, self.dtype )
+        a         = xp.zeros( self.space.npts, self.dtype )
         idx_from  = tuple( slice(m*p,-m*p) if p != 0 else slice(0, None) for p,m in zip(self.pads, self.space.shifts) )
         idx_to    = tuple( slice(s,e+1) for s,e in zip(self.starts,self.ends) )
         a[idx_to] = self._data[idx_from]
@@ -643,7 +646,7 @@ def _toarray_parallel_with_pads(self, order='C'):
         pads = [m*p for m,p in zip(self.space.shifts, self.pads)]
         # Step 0: create extended n-dimensional array with zero values
         shape = tuple( n+2*p for n,p in zip( self.space.npts, pads ) )
-        a = np.zeros( shape, self.dtype )
+        a = xp.zeros( shape, self.dtype )
 
         # Step 1: write extended data chunk (local to process) onto array
         idx = tuple( slice(s,e+2*p+1) for s,e,p in
@@ -915,7 +918,7 @@ def __init__( self, V, W, pads=None , backend=None, precompiled=True):
         self._pads     = pads or tuple(V.pads)
         dims           = list(W.shape)
         diags          = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, V.shifts, W.shifts)]
-        self._data     = np.zeros(dims+diags, dtype=W.dtype)
+        self._data     = xp.zeros(tuple(int(d) for d in (dims + diags)), dtype=W.dtype)
         self._domain   = V
         self._codomain = W
         self._ndim     = len(dims)
@@ -1059,8 +1062,8 @@ def vdot( self, v, out=None):
             v.update_ghost_regions()
 
         # Instead of computing A_*x, this function computes (A*x_)_
-        self._func(self._data, np.conjugate(v._data), out._data, **self._args)
-        np.conjugate(out._data, out=out._data)
+        self._func(self._data, xp.conjugate(v._data), out._data, **self._args)
+        xp.conjugate(out._data, out=out._data)
 
         # IMPORTANT: flag that ghost regions are not up-to-date
         out.ghost_regions_in_sync = False
@@ -1097,7 +1100,7 @@ def transpose(self, conjugate=False, out=None):
 
         # Call low-level '_transpose' function (works on Numpy arrays directly)
         if conjugate:
-            self._transpose_func(np.conjugate(M._data), out._data, **self._transpose_args)
+            self._transpose_func(xp.conjugate(M._data), out._data, **self._transpose_args)
         else:
             self._transpose_func(M._data, out._data, **self._transpose_args)
         return out
@@ -1201,7 +1204,7 @@ def conjugate(self, out=None):
             out = StencilMatrix(self.domain, self.codomain, pads=self.pads, backend=self._backend, precompiled=self._precompiled)
             out._func    = self._func
             out._args    = self._args
-        np.conjugate(self._data, out=out._data, casting='no')
+        xp.conjugate(self._data, out=out._data, casting='no')
         return out
 
     # ...
@@ -1454,14 +1457,14 @@ def diagonal(self, *, inverse = False, sqrt = False, out = None):
 
         # Calculate entries of StencilDiagonalMatrix
         if inverse:
-            data = np.divide(1, diag, out=data)
+            data = xp.divide(1, diag, out=data)
         elif out:
-            np.copyto(data, diag)
+            xp.copyto(data, diag)
         else:
             data = diag.copy()
 
         if sqrt:
-            np.sqrt(data, out=data)
+            xp.sqrt(data, out=data)
 
         # If needed create a new StencilDiagonalMatrix object
         if out is None:
@@ -1524,7 +1527,7 @@ def tocoo_local(self, order='C'):
         nr = [e-s+1 +2*p for s,e,p in zip(sc, ec, pc)]
         nc = [e-s+1 +2*p for s,e,p in zip(sd, ed, pd)]
 
-        ravel_multi_index = np.ravel_multi_index
+        ravel_multi_index = xp.ravel_multi_index
 
         # COO storage
         rows = []
@@ -1535,7 +1538,7 @@ def tocoo_local(self, order='C'):
 
         dd = [pdi-ppi for pdi,ppi in zip(pd, self._pads)]
 
-        for (index, value) in np.ndenumerate( self._data[local] ):
+        for (index, value) in xp.ndenumerate( self._data[local] ):
 
             # index = [i1-s1, i2-s2, ..., p1+j1-i1, p2+j2-i2, ...]
 
@@ -1554,7 +1557,7 @@ def tocoo_local(self, order='C'):
 
         M = coo_matrix(
                 (data,(rows,cols)),
-                shape = [np.prod(nr),np.prod(nc)],
+                shape = [xp.prod(nr),xp.prod(nc)],
                 dtype = self._domain.dtype
         )
 
@@ -1574,32 +1577,41 @@ def _tocoo_no_pads(self , order='C'):
         dm    = self._domain.shifts
         cm    = self._codomain.shifts
 
-        pp = [np.int64(compute_diag_len(p,mj,mi)-(p+1)) for p,mi,mj in zip(self._pads, cm, dm)]
+        pp = [int(compute_diag_len(p,mj,mi)-(p+1)) for p,mi,mj in zip(self._pads, cm, dm)]
 
         # Range of data owned by local process (no ghost regions)
         local = tuple( [slice(mi*p,-mi*p) if p != 0 else slice(p, None) for p,mi in zip(cpads, cm)] + [slice(None)] * nd )
         size  = self._data[local].size
 
         # COO storage
-        rows = np.zeros(size, dtype='int64')
-        cols = np.zeros(size, dtype='int64')
-        data = np.zeros(size, dtype=self.dtype)
-        nrl = [np.int64(e-s+1) for s,e in zip(self.codomain.starts, self.codomain.ends)]
-        ncl = [np.int64(i) for i in self._data.shape[nd:]]
-        ss = [np.int64(i) for i in ss]
-        nr = [np.int64(i) for i in nr]
-        nc = [np.int64(i) for i in nc]
-        dm = [np.int64(i) for i in dm]
-        cm = [np.int64(i) for i in cm]
-        cpads = [np.int64(i) for i in cpads]
-        pp = [np.int64(i) for i in pp]
+        rows = xp.zeros(size, dtype='int64')
+        cols = xp.zeros(size, dtype='int64')
+        data = xp.zeros(size, dtype=self.dtype)
+        nrl = [int(e-s+1) for s,e in zip(self.codomain.starts, self.codomain.ends)]
+        ncl = [int(i) for i in self._data.shape[nd:]]
+        ss = [int(i) for i in ss]
+        nr = [int(i) for i in nr]
+        nc = [int(i) for i in nc]
+        dm = [int(i) for i in dm]
+        cm = [int(i) for i in cm]
+        cpads = [int(i) for i in cpads]
+        pp = [int(i) for i in pp]
 
         stencil2coo = kernels['stencil2coo'][order][nd]
-
+        import numpy as _np
         ind = stencil2coo(self._data, data, rows, cols, *nrl, *ncl, *ss, *nr, *nc, *dm, *cm, *cpads, *pp)
-        M = coo_matrix(
+        
+        
+        if array_backend.backend == "cupy":
+            M = coo_matrix(
+                (data[:ind].get(), (rows[:ind].get(), cols[:ind].get())),
+                shape=[int(_np.prod(nr)), int(_np.prod(nc))],
+                dtype=self.dtype
+            )
+        else:
+            M = coo_matrix(
                 (data[:ind],(rows[:ind],cols[:ind])),
-                shape = [np.prod(nr),np.prod(nc)],
+                shape = [_np.prod(nr),_np.prod(nc)],
                 dtype = self.dtype)
         return M
 
@@ -1622,7 +1634,7 @@ def _tocoo_parallel_with_pads(self , order='C'):
         pd = self._domain.pads
         cc = self._codomain.periods
 
-        ravel_multi_index = np.ravel_multi_index
+        ravel_multi_index = xp.ravel_multi_index
 
         # COO storage
         rows = []
@@ -1637,7 +1649,7 @@ def _tocoo_parallel_with_pads(self , order='C'):
         ll_dims = self._data.shape[nd:]
 
         # Cycle over rows (x = p + i - s)
-        for xx in np.ndindex( *xx_dims ):
+        for xx in xp.ndindex( *xx_dims ):
 
             # Compute row multi-index with simple shift
             ii = [s + x - p for (s, x, p) in zip(ss, xx, pc)]
@@ -1662,7 +1674,7 @@ def _tocoo_parallel_with_pads(self , order='C'):
                 continue
 
             # Cycle over diagonals (l = p + k)
-            for ll in np.ndindex( *ll_dims ):
+            for ll in xp.ndindex( *ll_dims ):
 
                 # Compute column multi-index (k = j - i)
                 jj = [(i+l-p) % n for (i,l,n,p) in zip(ii,ll,nc,pp)]
@@ -1681,7 +1693,7 @@ def _tocoo_parallel_with_pads(self , order='C'):
         # Create Scipy COO matrix
         M = coo_matrix(
                 (data,(rows,cols)),
-                shape = [np.prod(nr), np.prod(nc)],
+                shape = [xp.prod(nr), xp.prod(nc)],
                 dtype = self._domain.dtype
         )
 
@@ -1767,7 +1779,7 @@ def _prepare_transpose_args(self):
         sl   = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\
                  for s, p, mi, mj in zip(starts, pp, cm, dm)]
 
-        si   = [(mi * p - mi * (int(np.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \
+        si   = [(mi * p - mi * (int(xp.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \
                  (mi * p - mi * (p//mi) + d * (mi - 1) if mi < mj else 0) + \
                  (mj * p - mj * (p//mi) + d * (mi - 1) if mi == mj else 0)\
                   for mi, mj, p, d in zip(cm, dm, pp, diff)]
@@ -1779,17 +1791,29 @@ def _prepare_transpose_args(self):
                  for mi, mj, n, p in zip(cm, dm, ndiagsT, pp)]
 
         args={}
-        args['n']   = np.int64(nrows)
-        args['nc']  = np.int64(ncols)
-        args['gp']  = np.int64(gpads)
-        args['p']   = np.int64(pp)
-        args['dm']  = np.int64(dm)
-        args['cm']  = np.int64(cm)
-        args['nd']  = np.int64(ndiags)
-        args['ndT'] = np.int64(ndiagsT)
-        args['si']  = np.int64(si)
-        args['sk']  = np.int64(sk)
-        args['sl']  = np.int64(sl)
+        # args['n']   = xp.int64(nrows)
+        # args['nc']  = xp.int64(ncols)
+        # args['gp']  = xp.int64(gpads)
+        # args['p']   = xp.int64(pp)
+        # args['dm']  = xp.int64(dm)
+        # args['cm']  = xp.int64(cm)
+        # args['nd']  = xp.int64(ndiags)
+        # args['ndT'] = xp.int64(ndiagsT)
+        # args['si']  = xp.int64(si)
+        # args['sk']  = xp.int64(sk)
+        # args['sl']  = xp.int64(sl)
+        args['n']   = [int(x) for x in nrows]
+        args['nc']  = [int(x) for x in ncols]
+        args['gp']  = [int(x) for x in gpads]
+        args['p']   = [int(x) for x in pp]
+        args['dm']  = [int(x) for x in dm]
+        args['cm']  = [int(x) for x in cm]
+        args['nd']  = [int(x) for x in ndiags]
+        args['ndT'] = [int(x) for x in ndiagsT]
+        args['si']  = [int(x) for x in si]
+        args['sk']  = [int(x) for x in sk]
+        args['sl']  = [int(x) for x in sl]
+
 
         return args
 
@@ -1811,7 +1835,7 @@ def set_backend(self, backend, precompiled):
 
         if self._backend is None:
             for key, arg in self._args.items():
-                self._args[key] = np.int64(arg)
+                self._args[key] = xp.int64(arg)
             self._func = self._dot
             self._args.pop('pads')
         elif precompiled:
@@ -1837,12 +1861,12 @@ def set_backend(self, backend, precompiled):
                 self._args['e_out'] = int(self.codomain.ends[0])
                 self._args['p_out'] = int(self.codomain.pads[0]) 
             else:
-                self._args['s_in'] = np.array(self.domain.starts)
-                self._args['p_in'] = np.array(self.domain.pads)
-                self._args['add'] = np.array(add)
-                self._args['s_out'] = np.array(self.codomain.starts)
-                self._args['e_out'] = np.array(self.codomain.ends)
-                self._args['p_out'] = np.array(self.codomain.pads)
+                self._args['s_in'] = xp.array(self.domain.starts)
+                self._args['p_in'] = xp.array(self.domain.pads)
+                self._args['add'] = xp.array(add)
+                self._args['s_out'] = xp.array(self.codomain.starts)
+                self._args['e_out'] = xp.array(self.codomain.ends)
+                self._args['p_out'] = xp.array(self.codomain.pads)
 
             # transpose kernel
             transp_func_name = 'transpose_' + str(self._ndim) + 'd_kernel'
@@ -1861,12 +1885,12 @@ def set_backend(self, backend, precompiled):
                 self._transpose_args['e_out'] = int(self.domain.ends[0])
                 self._transpose_args['p_out'] = int(self.domain.pads[0])
             else:
-                self._transpose_args['s_in'] = np.array(self.codomain.starts)
-                self._transpose_args['p_in'] = np.array(self.codomain.pads)
-                self._transpose_args['add'] = np.array(add)
-                self._transpose_args['s_out'] = np.array(self.domain.starts)
-                self._transpose_args['e_out'] = np.array(self.domain.ends)
-                self._transpose_args['p_out'] = np.array(self.domain.pads)
+                self._transpose_args['s_in'] = xp.array(self.codomain.starts)
+                self._transpose_args['p_in'] = xp.array(self.codomain.pads)
+                self._transpose_args['add'] = xp.array(add)
+                self._transpose_args['s_out'] = xp.array(self.domain.starts)
+                self._transpose_args['e_out'] = xp.array(self.domain.ends)
+                self._transpose_args['p_out'] = xp.array(self.domain.pads)
         else:
             raise AttributeError(f'This is the tiny-psydac version - must use precompiled kernels (but {precompiled = })!')
             from psydac.api.ast.linalg import LinearOperatorDot
@@ -1896,10 +1920,10 @@ def set_backend(self, backend, precompiled):
                     self._args.pop('cm')
 
                     for i in range(len(nrows)):
-                        self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
+                        self._args['s00_{i}'.format(i=i+1)] = xp.int64(starts[i])
 
                     for i in range(len(nrows)):
-                        self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
+                        self._args['n00_{i}'.format(i=i+1)] = xp.int64(nrows[i])
 
                 else:
                     dot = LinearOperatorDot(self._ndim,
@@ -1923,13 +1947,13 @@ def set_backend(self, backend, precompiled):
                     self._args.pop('cm')
 
                     for i in range(len(nrows)):
-                        self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
+                        self._args['s00_{i}'.format(i=i+1)] = xp.int64(starts[i])
 
                     for i in range(len(nrows)):
-                        self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
+                        self._args['n00_{i}'.format(i=i+1)] = xp.int64(nrows[i])
 
                     for i in range(len(nrows)):
-                        self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i])
+                        self._args['ne00_{i}'.format(i=i+1)] = xp.int64(nrows_extra[i])
 
             else:
                 dot = LinearOperatorDot(self._ndim,
@@ -1982,9 +2006,9 @@ def _get_diagonal_indices(self):
             nrows = [e - s + 1 for s, e in zip(self.codomain.starts, self.codomain.ends)]
             ndim  = self.domain.ndim
 
-            indices = [np.zeros(np.prod(nrows), dtype=int) for _ in range(2 * ndim)]
+            indices = [xp.zeros(xp.prod(nrows), dtype=int) for _ in range(2 * ndim)]
 
-            for l, xx in enumerate(np.ndindex(*nrows)):
+            for l, xx in enumerate(xp.ndindex(*nrows)):
                 ii = [m * p + x for m, p, x in zip(dm, dp, xx)]
                 jj = [p + x + s - ((x+s) // mi) * mj for x, mi, mj, p, s in zip(xx, cm, dm, pp, ss)]
                 for k in range(ndim):
@@ -2022,7 +2046,7 @@ def __init__(self, V, W, data):
         assert V.starts == W.starts
         assert V.ends   == W.ends
 
-        data = np.asarray(data)
+        data = xp.asarray(data)
 
         # Check shape of provided data
         shape = tuple(e - s + 1 for s, e in zip(V.starts, V.ends))
@@ -2067,7 +2091,7 @@ def dot(self, v, out=None):
 
         V = self.domain
         i = tuple(slice(s, e + 1) for s, e in zip(V.starts, V.ends))
-        np.multiply(self._data, v[i], out=out[i])
+        xp.multiply(self._data, v[i], out=out[i])
 
         out.ghost_regions_in_sync = False
 
@@ -2091,14 +2115,14 @@ def transpose(self, *, conjugate=False, out=None):
             if out is None:
                 data = self._data.copy()
             else:
-                np.copyto(out._data, self._data, casting='no')
+                xp.copyto(out._data, self._data, casting='no')
 
         else:
 
             if out is None:
-                data = np.conjugate(self._data, casting='no')
+                data = xp.conjugate(self._data, casting='no')
             else:
-                np.conjugate(self._data, out=out._data, casting='no')
+                xp.conjugate(self._data, out=out._data, casting='no')
 
         if out is None:
             out = StencilDiagonalMatrix(self.codomain, self.domain, data)
@@ -2120,7 +2144,7 @@ def copy(self, *, out=None):
             assert isinstance(out, StencilDiagonalMatrix)
             assert out.domain is self.domain
             assert out.codomain is self.codomain
-            np.copyto(out._data, self._data, casting='no')
+            xp.copyto(out._data, self._data, casting='no')
 
         return out
 
@@ -2163,9 +2187,9 @@ def diagonal(self, *, inverse = False, out = None):
 
         # Calculate entries, or set `out=self` in default case
         if inverse:
-            data = np.divide(1, diag, out=data)
+            data = xp.divide(1, diag, out=data)
         elif out:
-            np.copyto(data, diag)
+            xp.copyto(data, diag)
         else:
             out = self
 
@@ -2248,7 +2272,7 @@ def __init__(self, V, W, s_d, s_c, d_axis, c_axis, d_ext, c_ext, *, flip=None, p
 
         dims[c_axis] = W.pads[c_axis] + 1-diff + 2*W.shifts[c_axis]*W.pads[c_axis]
         diags        = [compute_diag_len(p, md, mc) for p,md,mc in zip(self._pads, Vin.shifts, W.shifts)]
-        self._data   = np.zeros(dims + diags, dtype=W.dtype)
+        self._data   = xp.zeros(dims + diags, dtype=W.dtype)
 
         # Parallel attributes
         if W.parallel and not isinstance(W.cart, InterfaceCartDecomposition):
@@ -2374,7 +2398,7 @@ def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d
         bb        = [p*m+p+1-n-s%m for p,m,n,s in zip(gpads, dm, ndiags, starts)]
         nn        = v.shape
 
-        for xx in np.ndindex( *nrows ):
+        for xx in xp.ndindex( *nrows ):
             ii    = [ mi*pi + x for mi,pi,x in zip(cm, gpads, xx) ]
             jj    = tuple( slice(b-d+(x+s%mj)//mi*mj,b-d+(x+s%mj)//mi*mj+n) for x,mi,mj,b,s,n,d in zip(xx,cm,dm,bb,starts,ndiags,diff) )
             jj    = [flip_axis(i,n) if f==-1 else i for i,f,n in zip(jj,flip,nn)]
@@ -2382,7 +2406,7 @@ def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d
             ii_kk = tuple( ii + kk )
 
             ii[c_axis] += c_start
-            out[tuple(ii)] = np.dot( mat[ii_kk].flat, v[jj].flat )
+            out[tuple(ii)] = xp.dot( mat[ii_kk].flat, v[jj].flat )
 
 
         new_nrows = nrows.copy()
@@ -2392,7 +2416,7 @@ def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d
             del rows[d]
 
             for n in range(er):
-                for xx in np.ndindex(*rows):
+                for xx in xp.ndindex(*rows):
                     xx = list(xx)
                     xx.insert(d, nrows[d]+n)
 
@@ -2404,7 +2428,7 @@ def _dot(mat, v, out, starts, nrows, nrows_extra, gpads, pads, dm, cm, c_axis, d
                     kk     = [slice(None,n-e) for n,e in zip(ndiags, ee)]
                     ii_kk  = tuple( ii + kk )
                     ii[c_axis] += c_start
-                    out[tuple(ii)] = np.dot( mat[ii_kk].flat, v[jj].flat )
+                    out[tuple(ii)] = xp.dot( mat[ii_kk].flat, v[jj].flat )
 
             new_nrows[d] += er
 
@@ -2425,7 +2449,7 @@ def transpose( self, conjugate=False, out=None):
 
         # Call low-level '_transpose' function (works on Numpy arrays directly)
         if conjugate:
-            M._transpose_func(np.conjugate(M._data), out._data, **M._transpose_args)
+            M._transpose_func(xp.conjugate(M._data), out._data, **M._transpose_args)
         else:
             M._transpose_func(M._data, out._data, **M._transpose_args)
         return out
@@ -2458,7 +2482,7 @@ def _prepare_transpose_args(self):
         sl   = [(s if mi > mj else 0) + (s % mi + mi//mj if mi < mj else 0)+(s if mi == mj else 0)\
                  for s, p, mi, mj in zip(starts, pp, cm, dm)]
 
-        si   = [(mi * p - mi * (int(np.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \
+        si   = [(mi * p - mi * (int(xp.ceil((p + 1)/mj)) - 1) if mi > mj else 0) + \
                  (mi * p - mi * (p//mi) + d * (mi - 1) if mi < mj else 0) + \
                  (mj * p - mj * (p//mi) + d * (mi - 1) if mi == mj else 0)\
                   for mi, mj, p, d in zip(cm, dm, pp, diff)]
@@ -2484,17 +2508,17 @@ def _prepare_transpose_args(self):
         ncols[dim] = pads[dim] + 1 - diff_c + 2*cm[dim]*pads[dim]
 
         args = {}
-        args['n']   = np.int64(nrows)
-        args['nc']  = np.int64(ncols)
-        args['gp']  = np.int64(gpads)
-        args['p']   = np.int64(pp)
-        args['dm']  = np.int64(dm)
-        args['cm']  = np.int64(cm)
-        args['nd']  = np.int64(ndiags)
-        args['ndT'] = np.int64(ndiagsT)
-        args['si']  = np.int64(si)
-        args['sk']  = np.int64(sk)
-        args['sl']  = np.int64(sl)
+        args['n']   = xp.int64(nrows)
+        args['nc']  = xp.int64(ncols)
+        args['gp']  = xp.int64(gpads)
+        args['p']   = xp.int64(pp)
+        args['dm']  = xp.int64(dm)
+        args['cm']  = xp.int64(cm)
+        args['nd']  = xp.int64(ndiags)
+        args['ndT'] = xp.int64(ndiagsT)
+        args['si']  = xp.int64(si)
+        args['sk']  = xp.int64(sk)
+        args['sl']  = xp.int64(sl)
 
         return args
 
@@ -2688,7 +2712,7 @@ def _tocoo_no_pads(self):
         dm          = self.domain.shifts
         cm          = self.codomain.shifts
 
-        ravel_multi_index = np.ravel_multi_index
+        ravel_multi_index = xp.ravel_multi_index
 
         # COO storage
         rows = []
@@ -2698,7 +2722,7 @@ def _tocoo_no_pads(self):
         local = tuple( [slice(m*p,-m*p) if p != 0 else slice(0, None) for m,p in zip(cm, pp)] + [slice(None)] * nd )
         pp = [compute_diag_len(p,mj,mi)-(p+1) for p,mi,mj in zip(self._pads, cm, dm)]
 
-        for (index,value) in np.ndenumerate( self._data[local] ):
+        for (index,value) in xp.ndenumerate( self._data[local] ):
             if value:
                 # index = [i1, i2, ..., p1+j1-i1, p2+j2-i2, ...]
 
@@ -2726,7 +2750,7 @@ def _tocoo_no_pads(self):
 
         M = coo_matrix(
                     (data,(rows,cols)),
-                    shape = [np.prod(nr),np.prod(nc)],
+                    shape = [xp.prod(nr),xp.prod(nc)],
                     dtype = self.domain.dtype)
 
         return M
@@ -2858,10 +2882,10 @@ def set_backend(self, backend, precompiled=False):
 
                     self._args = {}
                     for i in range(len(nrows)):
-                        self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
+                        self._args['s00_{i}'.format(i=i+1)] = xp.int64(starts[i])
 
                     for i in range(len(nrows)):
-                        self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
+                        self._args['n00_{i}'.format(i=i+1)] = xp.int64(nrows[i])
 
                 else:
                     dot = LinearOperatorDot(self._ndim,
@@ -2887,13 +2911,13 @@ def set_backend(self, backend, precompiled=False):
                     self._args = {}
 
                     for i in range(len(nrows)):
-                        self._args['s00_{i}'.format(i=i+1)] = np.int64(starts[i])
+                        self._args['s00_{i}'.format(i=i+1)] = xp.int64(starts[i])
 
                     for i in range(len(nrows)):
-                        self._args['n00_{i}'.format(i=i+1)] = np.int64(nrows[i])
+                        self._args['n00_{i}'.format(i=i+1)] = xp.int64(nrows[i])
 
                     for i in range(len(nrows)):
-                        self._args['ne00_{i}'.format(i=i+1)] = np.int64(nrows_extra[i])
+                        self._args['ne00_{i}'.format(i=i+1)] = xp.int64(nrows_extra[i])
 
             else:
                 dot = LinearOperatorDot(self._ndim,
diff --git a/psydac/linalg/tests/test_block.py b/psydac/linalg/tests/test_block.py
index 00badb580..75a109fa2 100644
--- a/psydac/linalg/tests/test_block.py
+++ b/psydac/linalg/tests/test_block.py
@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
 #
 import pytest
-import numpy as np
+import cunumpy as xp
 from scipy.sparse import csr_matrix
 from random import random, seed
 
@@ -25,7 +25,7 @@ def compute_global_starts_ends(domain_decomposition, npts):
 
         global_ends  [axis]     = ee.copy()
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     return global_starts, global_ends
 
@@ -195,17 +195,17 @@ def test_2D_block_linear_operator_serial_init( dtype, n1, n2, p1, p2, P1, P2  ):
     coo4 = L4.tosparse().tocoo()
 
     # Check if the data are in the same place
-    assert np.array_equal( coo1.col , coo2.col  )
-    assert np.array_equal( coo1.row , coo2.row  )
-    assert np.array_equal( coo1.data, coo2.data )
+    assert xp.array_equal( coo1.col , coo2.col  )
+    assert xp.array_equal( coo1.row , coo2.row  )
+    assert xp.array_equal( coo1.data, coo2.data )
 
-    assert np.array_equal( coo1.col , coo3.col  )
-    assert np.array_equal( coo1.row , coo3.row  )
-    assert np.array_equal( coo1.data, coo3.data )
+    assert xp.array_equal( coo1.col , coo3.col  )
+    assert xp.array_equal( coo1.row , coo3.row  )
+    assert xp.array_equal( coo1.data, coo3.data )
 
-    assert np.array_equal( coo1.col , coo4.col  )
-    assert np.array_equal( coo1.row , coo4.row  )
-    assert np.array_equal( coo1.data, coo4.data )
+    assert xp.array_equal( coo1.col , coo4.col  )
+    assert xp.array_equal( coo1.row , coo4.row  )
+    assert xp.array_equal( coo1.data, coo4.data )
     
     dict_blocks = {(0,0):M1, (0,1):M2}
 
@@ -293,22 +293,22 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3, dtype ):
 
     # Fill in vector with random values, then update ghost regions
     if ndim==1:
-        x1[:] = cst*2.0*np.random.random((npts[0]+2*p))
-        x2[:] = cst*5.0*np.random.random((npts[0]+2*p))
-        y1[:] = cst*2.0*np.random.random((npts[0]+2*p))
-        y2[:] = cst*3.0*np.random.random((npts[0]+2*p))
+        x1[:] = cst*2.0*xp.random.random((npts[0]+2*p))
+        x2[:] = cst*5.0*xp.random.random((npts[0]+2*p))
+        y1[:] = cst*2.0*xp.random.random((npts[0]+2*p))
+        y2[:] = cst*3.0*xp.random.random((npts[0]+2*p))
 
     elif ndim==2:
-        x1[:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p))
-        x2[:,:] = cst*5.0*np.random.random((npts[0]+2*p,npts[1]+2*p))
-        y1[:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p))
-        y2[:,:] = cst*3.0*np.random.random((npts[0]+2*p,npts[1]+2*p))
+        x1[:,:] = cst*2.0*xp.random.random((npts[0]+2*p,npts[1]+2*p))
+        x2[:,:] = cst*5.0*xp.random.random((npts[0]+2*p,npts[1]+2*p))
+        y1[:,:] = cst*2.0*xp.random.random((npts[0]+2*p,npts[1]+2*p))
+        y2[:,:] = cst*3.0*xp.random.random((npts[0]+2*p,npts[1]+2*p))
 
     else:
-        x1[:,:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
-        x2[:,:,:] = cst*5.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
-        y1[:,:,:] = cst*2.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
-        y2[:,:,:] = cst*3.0*np.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
+        x1[:,:,:] = cst*2.0*xp.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
+        x2[:,:,:] = cst*5.0*xp.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
+        y1[:,:,:] = cst*2.0*xp.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
+        y2[:,:,:] = cst*3.0*xp.random.random((npts[0]+2*p,npts[1]+2*p,npts[2]+2*p))
 
     x1.update_ghost_regions()
     x2.update_ghost_regions()
@@ -332,13 +332,13 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3, dtype ):
     exact_inner = V.inner(x1, y1) + V.inner(x2, y2)
 
     assert X.dtype == dtype
-    assert np.allclose(W.inner(X, Y), exact_inner,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(W.inner(X, Y), exact_inner,  rtol=1e-14, atol=1e-14 )
 
     # Test axpy product
-    axpy_exact = X + np.pi * cst * Y
-    X.mul_iadd(np.pi * cst, Y)
-    assert np.allclose(X[0]._data, axpy_exact[0]._data,  rtol=1e-10, atol=1e-10 )
-    assert np.allclose(X[1]._data, axpy_exact[1]._data,  rtol=1e-10, atol=1e-10 )
+    axpy_exact = X + xp.pi * cst * Y
+    X.mul_iadd(xp.pi * cst, Y)
+    assert xp.allclose(X[0]._data, axpy_exact[0]._data,  rtol=1e-10, atol=1e-10 )
+    assert xp.allclose(X[1]._data, axpy_exact[1]._data,  rtol=1e-10, atol=1e-10 )
 
     M1 = StencilMatrix(V, V)
     M2 = StencilMatrix(V, V)
@@ -377,7 +377,7 @@ def test_block_serial_dimension( ndim, p, P1, P2, P3, dtype ):
     Y[1] = M3.dot(x1)
 
     assert M.dtype == dtype
-    assert np.allclose((M.dot(X)).toarray(), Y.toarray(),  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose((M.dot(X)).toarray(), Y.toarray(),  rtol=1e-14, atol=1e-14 )
 
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -406,11 +406,11 @@ def test_3D_block_serial_basic_operator( dtype, npts, p, P1, P2, P3 ):
 
     W = BlockVectorSpace(V, V)
     if dtype==complex:
-        x1[:,:,:] = 2.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))+1j*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
-        x2[:,:,:] = 5.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))+2j*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
+        x1[:,:,:] = 2.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))+1j*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
+        x2[:,:,:] = 5.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))+2j*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
     else:
-        x1[:,:,:] = 2.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
-        x2[:,:,:] = 5.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
+        x1[:,:,:] = 2.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
+        x2[:,:,:] = 5.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1],npts[2]+2*p[2]))
 
 
     x1.update_ghost_regions()
@@ -431,43 +431,43 @@ def test_3D_block_serial_basic_operator( dtype, npts, p, P1, P2, P3 ):
 
     Y +=X
     assert Y.dtype == dtype
-    assert np.allclose(Y.blocks[0]._data, (x1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Y.blocks[1]._data, (x2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[0]._data, (x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[1]._data, (x2)._data,  rtol=1e-14, atol=1e-14 )
 
     Y -=2*X
-    assert np.allclose(Y.blocks[0]._data, -(x1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Y.blocks[1]._data, -(x2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[0]._data, -(x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[1]._data, -(x2)._data,  rtol=1e-14, atol=1e-14 )
 
     Y *=6
-    assert np.allclose(Y.blocks[0]._data, -6*(x1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Y.blocks[1]._data, -6*(x2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[0]._data, -6*(x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[1]._data, -6*(x2)._data,  rtol=1e-14, atol=1e-14 )
 
     Y /=-6
-    assert np.allclose(Y.blocks[0]._data, (x1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Y.blocks[1]._data, (x2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[0]._data, (x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Y.blocks[1]._data, (x2)._data,  rtol=1e-14, atol=1e-14 )
 
     Y[0]=x2
     Y[1]=-x1
 
     Z1=Y+X
     assert isinstance(Z1,BlockVector)
-    assert np.allclose(Z1.blocks[0]._data, (x1+x2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Z1.blocks[1]._data, (x2-x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z1.blocks[0]._data, (x1+x2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z1.blocks[1]._data, (x2-x1)._data,  rtol=1e-14, atol=1e-14 )
 
     Z2=Y-X
     assert isinstance(Z2,BlockVector)
-    assert np.allclose(Z2.blocks[0]._data, (x2-x1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Z2.blocks[1]._data, (-x2-x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z2.blocks[0]._data, (x2-x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z2.blocks[1]._data, (-x2-x1)._data,  rtol=1e-14, atol=1e-14 )
 
     Z3=3*Y
     assert isinstance(Z3,BlockVector)
-    assert np.allclose(Z3.blocks[0]._data, 3*(x2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Z3.blocks[1]._data, 3*(-x1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z3.blocks[0]._data, 3*(x2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z3.blocks[1]._data, 3*(-x1)._data,  rtol=1e-14, atol=1e-14 )
 
     Z4=Y/4
     assert isinstance(Z4,BlockVector)
-    assert np.allclose(Z4.blocks[0]._data, (x2)._data/4,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Z4.blocks[1]._data, (-x1)._data/4,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z4.blocks[0]._data, (x2)._data/4,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Z4.blocks[1]._data, (-x1)._data/4,  rtol=1e-14, atol=1e-14 )
 
 
     M1 = StencilMatrix(V, V)
@@ -495,56 +495,56 @@ def test_3D_block_serial_basic_operator( dtype, npts, p, P1, P2, P3 ):
 
     A +=M
     assert A.dtype == dtype
-    assert np.allclose(A.blocks[0][0]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[0][1]._data, (M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[1][0]._data, (M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][0]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][1]._data, (M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[1][0]._data, (M3)._data,  rtol=1e-14, atol=1e-14 )
     assert A.blocks[1][1]==None
 
     A -= 2*M
-    assert np.allclose(A.blocks[0][0]._data, -(M1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[0][1]._data, -(M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[1][0]._data, -(M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][0]._data, -(M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][1]._data, -(M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[1][0]._data, -(M3)._data,  rtol=1e-14, atol=1e-14 )
     assert A.blocks[1][1]==None
 
     A *= 5
-    assert np.allclose(A.blocks[0][0]._data, -5*(M1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[0][1]._data, -5*(M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[1][0]._data, -5*(M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][0]._data, -5*(M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][1]._data, -5*(M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[1][0]._data, -5*(M3)._data,  rtol=1e-14, atol=1e-14 )
     assert A.blocks[1][1]==None
 
     A /= -5
-    assert np.allclose(A.blocks[0][0]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[0][1]._data, (M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A.blocks[1][0]._data, (M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][0]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[0][1]._data, (M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A.blocks[1][0]._data, (M3)._data,  rtol=1e-14, atol=1e-14 )
     assert A.blocks[1][1]==None
 
     A= BlockLinearOperator(W, W, blocks=[[None, M3], [M2, M1]])
 
     A1=A+M
     assert isinstance(A1,BlockLinearOperator)
-    assert np.allclose(A1.blocks[0][0]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A1.blocks[0][1]._data, (M2+M3)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A1.blocks[1][0]._data, (M3+M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A1.blocks[1][1]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A1.blocks[0][0]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A1.blocks[0][1]._data, (M2+M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A1.blocks[1][0]._data, (M3+M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A1.blocks[1][1]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
 
     A2=A-M
     assert isinstance(A2,BlockLinearOperator)
-    assert np.allclose(A2.blocks[0][0]._data, (-M1)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A2.blocks[0][1]._data, (M3-M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A2.blocks[1][0]._data, (M2-M3)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A2.blocks[1][1]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A2.blocks[0][0]._data, (-M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A2.blocks[0][1]._data, (M3-M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A2.blocks[1][0]._data, (M2-M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A2.blocks[1][1]._data, (M1)._data,  rtol=1e-14, atol=1e-14 )
 
     A3=6*A
     assert isinstance(A3,BlockLinearOperator)
-    assert np.allclose(A3.blocks[0][1]._data, 6*(M3)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A3.blocks[1][0]._data, 6*(M2)._data,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A3.blocks[1][1]._data, 6*(M1)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A3.blocks[0][1]._data, 6*(M3)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A3.blocks[1][0]._data, 6*(M2)._data,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A3.blocks[1][1]._data, 6*(M1)._data,  rtol=1e-14, atol=1e-14 )
 
     A4=A/5
     assert isinstance(A4,BlockLinearOperator)
-    assert np.allclose(A4.blocks[0][1]._data, (M3)._data/5,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A4.blocks[1][0]._data, (M2)._data/5,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(A4.blocks[1][1]._data, (M1)._data/5,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A4.blocks[0][1]._data, (M3)._data/5,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A4.blocks[1][0]._data, (M2)._data/5,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(A4.blocks[1][1]._data, (M1)._data/5,  rtol=1e-14, atol=1e-14 )
 
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -570,11 +570,11 @@ def test_2D_block_serial_math( dtype, npts, p, P1, P2 ):
 
     W = BlockVectorSpace(V, V)
     if dtype==complex:
-        x1[:,:,:] = 2.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))+1j*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
-        x2[:,:,:] = 5.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))+2j*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
+        x1[:,:,:] = 2.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))+1j*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
+        x2[:,:,:] = 5.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))+2j*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
     else:
-        x1[:,:,:] = 2.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
-        x2[:,:,:] = 5.0*np.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
+        x1[:,:,:] = 2.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
+        x2[:,:,:] = 5.0*xp.random.random((npts[0]+2*p[0],npts[1]+2*p[1]))
 
 
     x1.update_ghost_regions()
@@ -593,12 +593,12 @@ def test_2D_block_serial_math( dtype, npts, p, P1, P2 ):
     X[1] = x2
 
     Xc=X.conjugate()
-    assert np.allclose(Xc.blocks[0].toarray(), x1a, rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Xc.blocks[1].toarray(), x2a, rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Xc.blocks[0].toarray(), x1a, rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Xc.blocks[1].toarray(), x2a, rtol=1e-14, atol=1e-14 )
 
     Xc=X.conj()
-    assert np.allclose(Xc.blocks[0].toarray(), x1a, rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Xc.blocks[1].toarray(), x2a, rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Xc.blocks[0].toarray(), x1a, rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Xc.blocks[1].toarray(), x2a, rtol=1e-14, atol=1e-14 )
 
 
     M1 = StencilMatrix(V, V)
@@ -632,14 +632,14 @@ def test_2D_block_serial_math( dtype, npts, p, P1, P2 ):
     M = BlockLinearOperator(W, W, blocks=[[M1, M2], [M3, None]])
 
     Mc = M.conjugate()
-    assert np.allclose(Mc.blocks[0][0].toarray(), M1a,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Mc.blocks[0][1].toarray(), M2a,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Mc.blocks[1][0].toarray(), M3a,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Mc.blocks[0][0].toarray(), M1a,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Mc.blocks[0][1].toarray(), M2a,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Mc.blocks[1][0].toarray(), M3a,  rtol=1e-14, atol=1e-14 )
 
     Mc = M.conj()
-    assert np.allclose(Mc.blocks[0][0].toarray(), M1a,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Mc.blocks[0][1].toarray(), M2a,  rtol=1e-14, atol=1e-14 )
-    assert np.allclose(Mc.blocks[1][0].toarray(), M3a,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Mc.blocks[0][0].toarray(), M1a,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Mc.blocks[0][1].toarray(), M2a,  rtol=1e-14, atol=1e-14 )
+    assert xp.allclose(Mc.blocks[1][0].toarray(), M3a,  rtol=1e-14, atol=1e-14 )
 
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -722,8 +722,8 @@ def test_block_linear_operator_serial_dot( dtype, n1, n2, p1, p2, P1, P2  ):
     y2 = M3.dot(x1)
 
     # Check data in 1D array
-    assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
-    assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float] )
 @pytest.mark.parametrize( 'n1', [8, 16] )
@@ -777,8 +777,8 @@ def test_block_2d_serial_array_to_psydac( dtype, n1, n2, p1, p2, P1, P2 ):
     v  = array_to_psydac(xa, W)
     v2  = array_to_psydac(x2a, W2)
 
-    assert np.allclose( xa , v.toarray() )
-    assert np.allclose( x2a , v2.toarray() )
+    assert xp.allclose( xa , v.toarray() )
+    assert xp.allclose( x2a , v2.toarray() )
 
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -827,7 +827,7 @@ def test_block_vector_2d_serial_topetsc( dtype, n1, n2, p1, p2, P1, P2 ):
     v = petsc_to_psydac(v, W)
 
     # The vectors can only be compared in the serial case
-    assert np.allclose( x.toarray() , v.toarray() )
+    assert xp.allclose( x.toarray() , v.toarray() )
 
 #===============================================================================
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -976,8 +976,8 @@ def test_block_linear_operator_dot_backend( dtype, n1, n2, p1, p2, P1, P2, backe
     # y2 = M3.dot(x1) + M4.dot(x2)
 
     # # Check data in 1D array
-    # assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-13, atol=1e-13 )
-    # assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-13, atol=1e-13 )
+    # assert xp.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-13, atol=1e-13 )
+    # assert xp.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-13, atol=1e-13 )
 #===============================================================================
 # PARALLEL TESTS
 #===============================================================================
@@ -1063,8 +1063,8 @@ def test_block_linear_operator_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ):
     y2 = M3.dot(x1) + M4.dot(x2)
 
     # Check data in 1D array
-    assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
-    assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
 
     # Test copy with an out 
     # Create random matrix 
@@ -1097,8 +1097,8 @@ def test_block_linear_operator_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ):
     K.dot(X, out= Y)
 
     # Check data in 1D array
-    assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
-    assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
 
     # Test transpose with an out, check that we overwrite the random entries
     L.transpose(out = N)
@@ -1111,8 +1111,8 @@ def test_block_linear_operator_parallel_dot( dtype, n1, n2, p1, p2, P1, P2 ):
     y2 = M2.T.dot(x1) + M4.T.dot(x2)
 
     # Check data in 1D array
-    assert np.allclose( Z.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
-    assert np.allclose( Z.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Z.blocks[0].toarray(), y1.toarray(), rtol=1e-14, atol=1e-14 )
+    assert xp.allclose( Z.blocks[1].toarray(), y2.toarray(), rtol=1e-14, atol=1e-14 )
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float])
@@ -1171,12 +1171,12 @@ def test_block_vector_2d_parallel_array_to_psydac(dtype, n1, n2, p1, p2, s1, s2,
 
 
     # Apply array_to_psydac first, and toarray next
-    xa_r_inv = np.array(np.random.rand(xa.size), dtype=dtype)*xa # the vector must be distributed as xa
+    xa_r_inv = xp.array(xp.random.rand(xa.size), dtype=dtype)*xa # the vector must be distributed as xa
     x_r_inv = array_to_psydac(xa_r_inv, W)
     x_r_inv.update_ghost_regions()
     va_r_inv = x_r_inv.toarray()
 
-    x2a_r_inv = np.array(np.random.rand(x2a.size), dtype=dtype)*x2a # the vector must be distributed as xa
+    x2a_r_inv = xp.array(xp.random.rand(x2a.size), dtype=dtype)*x2a # the vector must be distributed as xa
     x2_r_inv = array_to_psydac(x2a_r_inv, W2)
     x2_r_inv.update_ghost_regions()
     v2a_r_inv = x2_r_inv.toarray()
@@ -1188,16 +1188,16 @@ def test_block_vector_2d_parallel_array_to_psydac(dtype, n1, n2, p1, p2, s1, s2,
     assert isinstance(w2, BlockVector)
     assert w2.space is W2    
     for i in range(2):
-        assert np.array_equal(x[i]._data, w[i]._data)
+        assert xp.array_equal(x[i]._data, w[i]._data)
         for j in range(2):
-            assert np.array_equal(x2[i][j]._data, w2[i][j]._data)
-            assert np.array_equal(x2[i][j]._data, w2[i][j]._data)
+            assert xp.array_equal(x2[i][j]._data, w2[i][j]._data)
+            assert xp.array_equal(x2[i][j]._data, w2[i][j]._data)
 
-    assert np.array_equal(x2[2]._data, w2[2]._data)
+    assert xp.array_equal(x2[2]._data, w2[2]._data)
 
     # right inverse:
-    assert np.array_equal(xa_r_inv, va_r_inv)
-    assert np.array_equal(x2a_r_inv, v2a_r_inv)
+    assert xp.array_equal(xa_r_inv, va_r_inv)
+    assert xp.array_equal(x2a_r_inv, v2a_r_inv)
 
 #===============================================================================    
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -1253,7 +1253,7 @@ def test_block_vector_2d_parallel_topetsc( dtype, n1, n2, p1, p2, P1, P2 ):
 
     v = petsc_to_psydac(x.topetsc(), W)
 
-    assert np.allclose( x.toarray() , v.toarray(), rtol=1e-12, atol=1e-12 )
+    assert xp.allclose( x.toarray() , v.toarray(), rtol=1e-12, atol=1e-12 )
 
 #=============================================================================== 
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -1323,7 +1323,7 @@ def test_block_linear_operator_1d_parallel_topetsc( dtype, n1, p1, P1):
     # Cast result back to Psydac BlockVector format
     y_p = petsc_to_psydac(y_petsc, V)
     
-    assert np.allclose(y_p.toarray(), y.toarray(), rtol=1e-12, atol=1e-12)
+    assert xp.allclose(y_p.toarray(), y.toarray(), rtol=1e-12, atol=1e-12)
 
 #===============================================================================    
 @pytest.mark.parametrize( 'dtype', [float] )
@@ -1402,7 +1402,7 @@ def test_block_linear_operator_2d_parallel_topetsc( dtype, n1, n2, p1, p2, P1, P
     # Cast result back to Psydac BlockVector format
     y_p = petsc_to_psydac(y_petsc, L.codomain)
     
-    assert np.allclose(y_p.toarray(), y.toarray(), rtol=1e-12, atol=1e-12)
+    assert xp.allclose(y_p.toarray(), y.toarray(), rtol=1e-12, atol=1e-12)
 
 #===============================================================================
 
@@ -1492,11 +1492,11 @@ def test_block_matrix_operator_parallel_dot_backend( dtype, n1, n2, p1, p2, P1,
     # X.mul_iadd(5 * factor, Y)
 
     # # Test exact value and symetry of the scalar product
-    # assert np.allclose(X[0]._data, z3[0]._data)
+    # assert xp.allclose(X[0]._data, z3[0]._data)
 
     # # Check data in 1D array
-    # assert np.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-13, atol=1e-13 )
-    # assert np.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-13, atol=1e-13 )
+    # assert xp.allclose( Y.blocks[0].toarray(), y1.toarray(), rtol=1e-13, atol=1e-13 )
+    # assert xp.allclose( Y.blocks[1].toarray(), y2.toarray(), rtol=1e-13, atol=1e-13 )
 
 #===============================================================================
 # SCRIPT FUNCTIONALITY
diff --git a/psydac/linalg/tests/test_fft.py b/psydac/linalg/tests/test_fft.py
index 014ce95eb..6060547b7 100644
--- a/psydac/linalg/tests/test_fft.py
+++ b/psydac/linalg/tests/test_fft.py
@@ -1,6 +1,6 @@
 import pytest
 import scipy.fft as scifft
-import numpy as np
+import cunumpy as xp
 
 from psydac.ddm.mpi import mpi as MPI
 from psydac.linalg.fft import *
@@ -18,7 +18,7 @@ def compute_global_starts_ends(domain_decomposition, npts):
 
         global_ends  [axis]     = ee.copy()
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     return global_starts, global_ends
 #===============================================================================
@@ -39,7 +39,7 @@ def decode_fft_type(ffttype):
         raise NotImplementedError()
 
 def method_test(seed, comm, config, dtype, classtype, comparison, verbose=False):
-    np.random.seed(seed)
+    xp.random.seed(seed)
 
     if comm is None:
         rank = -1
@@ -66,10 +66,10 @@ def method_test(seed, comm, config, dtype, classtype, comparison, verbose=False)
     if verbose:
         print(f'[{rank}] Vector spaces built', flush=True)
 
-    if np.dtype(dtype).kind == 'c':
-        Y_glob = np.random.random(V.npts) + np.random.random(V.npts) * 1j
+    if xp.dtype(dtype).kind == 'c':
+        Y_glob = xp.random.random(V.npts) + xp.random.random(V.npts) * 1j
     else:
-        Y_glob = np.random.random(V.npts)
+        Y_glob = xp.random.random(V.npts)
 
     # vector to solve for (Y)
     Y = StencilVector(V)
@@ -87,7 +87,7 @@ def method_test(seed, comm, config, dtype, classtype, comparison, verbose=False)
     if verbose:
         print(f'[{rank}] Functions have been run', flush=True)
 
-    assert np.allclose(X_glob[localslice], X[localslice], 1e-10, 1e-10)
+    assert xp.allclose(X_glob[localslice], X[localslice], 1e-10, 1e-10)
 
 @pytest.mark.parametrize( 'seed', [0, 2] )
 @pytest.mark.parametrize( 'params', [([8], [2], [False]), ([8,9], [2,3], [False,True]), ([8,9,17], [2,3,7], [False,True,False])] )
diff --git a/psydac/linalg/tests/test_kron_stencil_matrix.py b/psydac/linalg/tests/test_kron_stencil_matrix.py
index f7d3a4ac7..bc7e5f48a 100644
--- a/psydac/linalg/tests/test_kron_stencil_matrix.py
+++ b/psydac/linalg/tests/test_kron_stencil_matrix.py
@@ -1,7 +1,7 @@
 from functools import reduce
 
 import pytest
-import numpy as np
+import cunumpy as xp
 from scipy.sparse import kron
 
 from psydac.ddm.cart       import DomainDecomposition, CartDecomposition
@@ -20,7 +20,7 @@ def compute_global_starts_ends(domain_decomposition, npts):
 
         global_ends  [axis]     = ee.copy()
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     return tuple(global_starts), tuple(global_ends)
 
@@ -111,4 +111,4 @@ def test_KroneckerStencilMatrix(dtype, npts, pads, periodic):
     assert (M_sp.T - M.T.tosparse().tocsr()).count_nonzero() == 0
 
     # Test dot product
-    assert np.array_equal(M_sp.dot(w.toarray()), M.dot(w).toarray())
+    assert xp.array_equal(M_sp.dot(w.toarray()), M.dot(w).toarray())
diff --git a/psydac/linalg/tests/test_linalg.py b/psydac/linalg/tests/test_linalg.py
index 8ce093094..c0c497af4 100644
--- a/psydac/linalg/tests/test_linalg.py
+++ b/psydac/linalg/tests/test_linalg.py
@@ -1,5 +1,5 @@
 import pytest
-import numpy as np
+import cunumpy as xp
 
 from psydac.linalg.block import BlockLinearOperator, BlockVector, BlockVectorSpace
 from psydac.linalg.basic import LinearOperator, ZeroOperator, IdentityOperator, ComposedLinearOperator, SumLinearOperator, PowerLinearOperator, ScaledLinearOperator
@@ -15,7 +15,7 @@
 p2array = [1, 3]
 
 def array_equal(a, b):
-    return np.array_equal(a.toarray(), b.toarray())
+    return xp.array_equal(a.toarray(), b.toarray())
 
 def sparse_equal(a, b):
     return (a.tosparse() != b.tosparse()).nnz == 0
@@ -23,7 +23,7 @@ def sparse_equal(a, b):
 def assert_pos_def(A):
     assert isinstance(A, LinearOperator)
     A_array = A.toarray()
-    assert np.all(np.linalg.eigvals(A_array) > 0)
+    assert xp.all(xp.linalg.eigvals(A_array) > 0)
 
 def compute_global_starts_ends(domain_decomposition, npts):
     ndims         = len(npts)
@@ -36,7 +36,7 @@ def compute_global_starts_ends(domain_decomposition, npts):
 
         global_ends  [axis]     = ee.copy()
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     return global_starts, global_ends
 
@@ -51,7 +51,7 @@ def get_StencilVectorSpace(npts, pads, periods):
 
 def get_positive_definite_StencilMatrix(V):
 
-    np.random.seed(2)
+    xp.random.seed(2)
     assert isinstance(V, StencilVectorSpace)
     [n1, n2] = V._npts
     [p1, p2] = V._pads
@@ -63,12 +63,12 @@ def get_positive_definite_StencilMatrix(V):
     for i in range(0, p1+1):
         if i != 0:
             for j in range(-p2, p2+1):
-                S[:, :, i, j] = 2*np.random.random()-1
+                S[:, :, i, j] = 2*xp.random.random()-1
         else:
             for j in range(1, p2+1):
-                S[:, :, i, j] = 2*np.random.random()-1
+                S[:, :, i, j] = 2*xp.random.random()-1
     S += S.T
-    S[:, :, 0, 0] = ((n1 * n2) - 1) / np.random.random()
+    S[:, :, 0, 0] = ((n1 * n2) - 1) / xp.random.random()
     S /= S[0, 0, 0, 0]
     S.remove_spurious_entries()
 
@@ -161,7 +161,7 @@ def test_square_stencil_basic(n1, n2, p1, p2, P1=False, P2=False):
     S2a = S2.toarray()
 
     # Construct exact matrices by hand
-    A1 = np.zeros( S.shape )
+    A1 = xp.zeros( S.shape )
     for i1 in range(n1):
         for i2 in range(n2):
             for k1 in range(-p1,p1+1):
@@ -173,7 +173,7 @@ def test_square_stencil_basic(n1, n2, p1, p2, P1=False, P2=False):
                     if (P1 or 0 <= i1+k1 < n1) and (P2 or 0 <= i2+k2 < n2):
                         A1[i,j] = nonzero_values[k1,k2]
 
-    A2 = np.zeros( S.shape )
+    A2 = xp.zeros( S.shape )
     for i1 in range(n1):
         for i2 in range(n2):
             for k1 in range(-p1,p1+1):
@@ -186,12 +186,12 @@ def test_square_stencil_basic(n1, n2, p1, p2, P1=False, P2=False):
                         A2[i,j] = nonzero_values1[k1,k2]
 
     # Check shape and data in 2D array
-    assert np.array_equal(v.toarray(), np.ones(n1 * n2))
+    assert xp.array_equal(v.toarray(), xp.ones(n1 * n2))
 
     assert Sa.shape == S.shape
-    assert np.array_equal( Sa, A1 )
+    assert xp.array_equal( Sa, A1 )
     assert S1a.shape == S1.shape
-    assert np.array_equal( S1a, A2 )
+    assert xp.array_equal( S1a, A2 )
 
     ###
     ### 2. Test general basic operations
@@ -217,9 +217,9 @@ def test_square_stencil_basic(n1, n2, p1, p2, P1=False, P2=False):
     ## ___Multiplication, Composition, Raising to a Power___
 
     # Multiplying and Dividing a StencilMatrix by a scalar returns a StencilMatrix
-    assert isinstance(np.pi * S, StencilMatrix)
-    assert isinstance(S * np.pi, StencilMatrix)
-    assert isinstance(S / np.pi, StencilMatrix)
+    assert isinstance(xp.pi * S, StencilMatrix)
+    assert isinstance(S * xp.pi, StencilMatrix)
+    assert isinstance(S / xp.pi, StencilMatrix)
 
     # Composing StencilMatrices works
     assert isinstance(S @ S1, ComposedLinearOperator)
@@ -229,10 +229,10 @@ def test_square_stencil_basic(n1, n2, p1, p2, P1=False, P2=False):
 
     ## ___Transposing___
     
-    assert not np.array_equal(S2a, S2a.T) # using a nonsymmetric matrix throughout
+    assert not xp.array_equal(S2a, S2a.T) # using a nonsymmetric matrix throughout
     assert isinstance(S2.T, StencilMatrix)
-    assert np.array_equal(S2.T.toarray(), S2a.T)
-    assert np.array_equal(S2.T.T.toarray(), S2a)
+    assert xp.array_equal(S2.T.toarray(), S2a.T)
+    assert xp.array_equal(S2.T.T.toarray(), S2a)
 
     ###
     ### 3. Test special cases
@@ -379,9 +379,9 @@ def test_square_block_basic(n1, n2, p1, p2, P1=False, P2=False):
     ## ___Multiplication, Composition, Raising to a Power___
 
     # Multiplying and Dividing a BlockLO by a scalar returns a BlockLO
-    assert isinstance(np.pi * B, BlockLinearOperator)
-    assert isinstance(B * np.pi, BlockLinearOperator)
-    assert isinstance(B / np.pi, BlockLinearOperator)
+    assert isinstance(xp.pi * B, BlockLinearOperator)
+    assert isinstance(B * xp.pi, BlockLinearOperator)
+    assert isinstance(B / xp.pi, BlockLinearOperator)
 
     # Composing BlockLOs works
     assert isinstance(B @ B1, ComposedLinearOperator)
@@ -390,10 +390,10 @@ def test_square_block_basic(n1, n2, p1, p2, P1=False, P2=False):
     assert isinstance(B**3, PowerLinearOperator)
 
     ## ___Transposing___
-    assert not np.array_equal(B2.toarray(), B2.toarray().T) # using a nonsymmetric matrix throughout
+    assert not xp.array_equal(B2.toarray(), B2.toarray().T) # using a nonsymmetric matrix throughout
     assert isinstance(B2.T, BlockLinearOperator)
-    assert np.array_equal(B2.T.toarray(), B2.toarray().T)
-    assert np.array_equal(B2.T.T.toarray(), B2.toarray())
+    assert xp.array_equal(B2.T.toarray(), B2.toarray().T)
+    assert xp.array_equal(B2.T.T.toarray(), B2.toarray())
 
     ###
     ### 3. Test special cases
@@ -453,7 +453,7 @@ def test_in_place_operations(n1, n2, p1, p2, P1=False, P2=False):
     Vc._dtype = complex
     v = StencilVector(V)
     vc = StencilVector(Vc)
-    v_array = np.zeros(n1*n2)
+    v_array = xp.zeros(n1*n2)
 
     for i in range(n1):
         for j in range(n2):
@@ -473,13 +473,13 @@ def test_in_place_operations(n1, n2, p1, p2, P1=False, P2=False):
     I4 *= 3j
     v4 = I4.dot(vc)
 
-    assert np.array_equal(v.toarray(), v_array)
+    assert xp.array_equal(v.toarray(), v_array)
     assert isinstance(I1, ZeroOperator)
     assert isinstance(I2, IdentityOperator)
     assert isinstance(I3, ScaledLinearOperator)
-    assert np.array_equal(v3.toarray(), np.dot(v_array, 3))
+    assert xp.array_equal(v3.toarray(), xp.dot(v_array, 3))
     assert isinstance(I4, ScaledLinearOperator)
-    assert np.array_equal(v4.toarray(), np.dot(v_array, 3j))
+    assert xp.array_equal(v4.toarray(), xp.dot(v_array, 3j))
 
     # testing __iadd__ and __isub__ although not explicitly implemented (in the LinearOperator class)
 
@@ -518,7 +518,7 @@ def test_in_place_operations(n1, n2, p1, p2, P1=False, P2=False):
     w = S.dot(v)
 
     assert isinstance(S, StencilMatrix)
-    assert np.array_equal(w.toarray(), np.dot(np.dot(2, Sa), v_array))
+    assert xp.array_equal(w.toarray(), xp.dot(xp.dot(2, Sa), v_array))
 
     Z3 -= T
     T -= Z2
@@ -528,7 +528,7 @@ def test_in_place_operations(n1, n2, p1, p2, P1=False, P2=False):
 
     assert isinstance(Z3, StencilMatrix)
     assert isinstance(T, StencilMatrix)
-    assert np.array_equal(w2.toarray(), np.dot(np.dot(2, Sa), v_array))
+    assert xp.array_equal(w2.toarray(), xp.dot(xp.dot(2, Sa), v_array))
  
 #===============================================================================
 @pytest.mark.parametrize('n1', n1array)
@@ -626,10 +626,10 @@ def test_inverse_transpose_interaction(n1, n2, p1, p2, P1=False, P2=False):
     ###
 
     # Square root test
-    scaled_matrix = B * np.random.random() # Ensure the diagonal elements != 1
+    scaled_matrix = B * xp.random.random() # Ensure the diagonal elements != 1
     diagonal_values = scaled_matrix.diagonal(sqrt=False).toarray()
     sqrt_diagonal_values = scaled_matrix.diagonal(sqrt=True).toarray()
-    assert np.array_equal(sqrt_diagonal_values, np.sqrt(diagonal_values))
+    assert xp.array_equal(sqrt_diagonal_values, xp.sqrt(diagonal_values))
 
     tol = 1e-5
     C = inverse(B, 'cg', tol=tol)
@@ -784,29 +784,29 @@ def test_operator_evaluation(n1, n2, p1, p2):
     b0 = ( B**0 @ u ).toarray()
     b1 = ( B**1 @ u ).toarray()
     b2 = ( B**2 @ u ).toarray()
-    assert np.array_equal(uarr, b0)
-    assert np.linalg.norm( np.dot(Bmat, uarr) - b1 ) < 1e-10
-    assert np.linalg.norm( np.dot(Bmat, np.dot(Bmat, uarr)) - b2 ) < 1e-10
+    assert xp.array_equal(uarr, b0)
+    assert xp.linalg.norm( xp.dot(Bmat, uarr) - b1 ) < 1e-10
+    assert xp.linalg.norm( xp.dot(Bmat, xp.dot(Bmat, uarr)) - b2 ) < 1e-10
 
     bi0 = ( B_ILO**0 @ u ).toarray()
     bi1 = ( B_ILO**1 @ u ).toarray()
     bi2 = ( B_ILO**2 @ u ).toarray()
-    B_inv_mat = np.linalg.inv(Bmat)
-    b_inv_arr = np.matrix.flatten(B_inv_mat)
-    error_est = 2 + n1 * n2 * np.max( [ np.abs(b_inv_arr[i]) for i in range(len(b_inv_arr)) ] )
-    assert np.array_equal(uarr, bi0)
-    bi12 = np.linalg.solve(Bmat, uarr)
-    bi22 = np.linalg.solve(Bmat, bi12)
-    assert np.linalg.norm( (Bmat @ bi12) - uarr ) < tol
-    assert np.linalg.norm( (Bmat @ bi22) - bi12 ) < error_est * tol
+    B_inv_mat = xp.linalg.inv(Bmat)
+    b_inv_arr = xp.matrix.flatten(B_inv_mat)
+    error_est = 2 + n1 * n2 * xp.max( [ xp.abs(b_inv_arr[i]) for i in range(len(b_inv_arr)) ] )
+    assert xp.array_equal(uarr, bi0)
+    bi12 = xp.linalg.solve(Bmat, uarr)
+    bi22 = xp.linalg.solve(Bmat, bi12)
+    assert xp.linalg.norm( (Bmat @ bi12) - uarr ) < tol
+    assert xp.linalg.norm( (Bmat @ bi22) - bi12 ) < error_est * tol
 
     zeros = U.zeros().toarray()
     z0 = ( Z**0 @ u ).toarray()
     z1 = ( Z**1 @ u ).toarray()
     z2 = ( Z**2 @ u ).toarray()
-    assert np.array_equal(uarr, z0)
-    assert np.array_equal(zeros, z1)
-    assert np.array_equal(zeros, z2)
+    assert xp.array_equal(uarr, z0)
+    assert xp.array_equal(zeros, z1)
+    assert xp.array_equal(zeros, z2)
 
     Smat = S.toarray()
     assert_pos_def(S)
@@ -814,28 +814,28 @@ def test_operator_evaluation(n1, n2, p1, p2):
     s0 = ( S**0 @ v ).toarray()
     s1 = ( S**1 @ v ).toarray()
     s2 = ( S**2 @ v ).toarray()
-    assert np.array_equal(varr, s0)
-    assert np.linalg.norm( np.dot(Smat, varr) - s1 ) < 1e-10
-    assert np.linalg.norm( np.dot(Smat, np.dot(Smat, varr)) - s2 ) < 1e-10
+    assert xp.array_equal(varr, s0)
+    assert xp.linalg.norm( xp.dot(Smat, varr) - s1 ) < 1e-10
+    assert xp.linalg.norm( xp.dot(Smat, xp.dot(Smat, varr)) - s2 ) < 1e-10
 
     si0 = ( S_ILO**0 @ v ).toarray()
     si1 = ( S_ILO**1 @ v ).toarray()
     si2 = ( S_ILO**2 @ v ).toarray()
-    S_inv_mat = np.linalg.inv(Smat)
-    s_inv_arr = np.matrix.flatten(S_inv_mat)
-    error_est = 2 + n1 * n2 * np.max( [ np.abs(s_inv_arr[i]) for i in range(len(s_inv_arr)) ] )
-    assert np.array_equal(varr, si0)
-    si12 = np.linalg.solve(Smat, varr)
-    si22 = np.linalg.solve(Smat, si12)
-    assert np.linalg.norm( (Smat @ si12) - varr ) < tol
-    assert np.linalg.norm( (Smat @ si22) - si12 ) < error_est * tol
+    S_inv_mat = xp.linalg.inv(Smat)
+    s_inv_arr = xp.matrix.flatten(S_inv_mat)
+    error_est = 2 + n1 * n2 * xp.max( [ xp.abs(s_inv_arr[i]) for i in range(len(s_inv_arr)) ] )
+    assert xp.array_equal(varr, si0)
+    si12 = xp.linalg.solve(Smat, varr)
+    si22 = xp.linalg.solve(Smat, si12)
+    assert xp.linalg.norm( (Smat @ si12) - varr ) < tol
+    assert xp.linalg.norm( (Smat @ si22) - si12 ) < error_est * tol
 
     i0 = ( I**0 @ v ).toarray()
     i1 = ( I**1 @ v ).toarray()
     i2 = ( I**2 @ v ).toarray()
-    assert np.array_equal(varr, i0)
-    assert np.array_equal(varr, i1)
-    assert np.array_equal(varr, i2)
+    assert xp.array_equal(varr, i0)
+    assert xp.array_equal(varr, i1)
+    assert xp.array_equal(varr, i2)
 
     ### 2.2 SumLO tests
     Sum1 = B + B_ILO + B + B_ILO
@@ -844,16 +844,16 @@ def test_operator_evaluation(n1, n2, p1, p2):
     sum2 = Sum2 @ v
     u_approx = B @ (0.5*(sum1 - 2*B@u))
     v_approx = S @ (0.5*(sum2 - 2*S@v))
-    assert np.linalg.norm( (u_approx - u).toarray() ) < tol
-    assert np.linalg.norm( (v_approx - v).toarray() ) < tol
+    assert xp.linalg.norm( (u_approx - u).toarray() ) < tol
+    assert xp.linalg.norm( (v_approx - v).toarray() ) < tol
 
     ### 2.3 CompLO tests
     C1 = B @ (-B)
     C2 = S @ (-S)
     c1 = ( C1 @ u ).toarray()
     c2 = ( C2 @ v ).toarray()
-    assert np.array_equal(-c1, b2)
-    assert np.array_equal(-c2, s2)
+    assert xp.array_equal(-c1, b2)
+    assert xp.array_equal(-c2, s2)
 
     ### 2.4 Huge composition
     ZV = ZeroOperator(V, V)
@@ -863,7 +863,7 @@ def test_operator_evaluation(n1, n2, p1, p2):
     H4 = 2 * (S**1 @ S**0)
     H5 = ZV @ I
     H = H1 @ ( H2 + H3 - H4 + H5 ).T
-    assert np.linalg.norm( (H @ v).toarray() - v.toarray() ) < 10 * tol
+    assert xp.linalg.norm( (H @ v).toarray() - v.toarray() ) < 10 * tol
 
     ### 2.5 InverseLO test
 
@@ -893,17 +893,17 @@ def test_operator_evaluation(n1, n2, p1, p2):
     # Several break-criteria in the LSMR algorithm require different way to determine success
     # than asserting rnorm < tol, as that is not required. Even though it should?
 
-    assert np.linalg.norm( (S @ xs_cg - v).toarray() ) < tol
-    assert np.linalg.norm( (S @ xs_pcg - v).toarray() ) < tol
-    assert np.linalg.norm( (S @ xs_bicg - v).toarray() ) < tol
+    assert xp.linalg.norm( (S @ xs_cg - v).toarray() ) < tol
+    assert xp.linalg.norm( (S @ xs_pcg - v).toarray() ) < tol
+    assert xp.linalg.norm( (S @ xs_bicg - v).toarray() ) < tol
     assert S_lsmr.get_success() == True
-    assert np.linalg.norm( (S @ xs_mr - v).toarray() ) < tol
+    assert xp.linalg.norm( (S @ xs_mr - v).toarray() ) < tol
 
-    assert np.linalg.norm( (B @ xb_cg - u).toarray() ) < tol
-    assert np.linalg.norm( (B @ xb_pcg - u).toarray() ) < tol
-    assert np.linalg.norm( (B @ xb_bicg - u).toarray() ) < tol
+    assert xp.linalg.norm( (B @ xb_cg - u).toarray() ) < tol
+    assert xp.linalg.norm( (B @ xb_pcg - u).toarray() ) < tol
+    assert xp.linalg.norm( (B @ xb_bicg - u).toarray() ) < tol
     assert B_lsmr.get_success() == True
-    assert np.linalg.norm( (B @ xb_mr - u).toarray() ) < tol
+    assert xp.linalg.norm( (B @ xb_mr - u).toarray() ) < tol
 
 #===============================================================================
 
@@ -956,8 +956,8 @@ def test_internal_storage():
     assert len(Z2_1.tmp_vectors) == 3
     assert len(Z2_2.tmp_vectors) == 3
     assert len(Z2_3.tmp_vectors) == 3
-    assert np.array_equal( y1_1.toarray(), y1_2.toarray() ) & np.array_equal( y1_2.toarray(), y1_3.toarray() )
-    assert np.array_equal( y2_1.toarray(), y2_2.toarray() ) & np.array_equal( y2_2.toarray(), y2_3.toarray() )
+    assert xp.array_equal( y1_1.toarray(), y1_2.toarray() ) & xp.array_equal( y1_2.toarray(), y1_3.toarray() )
+    assert xp.array_equal( y2_1.toarray(), y2_2.toarray() ) & xp.array_equal( y2_2.toarray(), y2_3.toarray() )
 
 #===============================================================================
 @pytest.mark.parametrize('solver', ['cg', 'pcg', 'bicg', 'minres', 'lsmr'])
@@ -975,7 +975,7 @@ def test_x0update(solver):
     b = StencilVector(V)
     for n in range(n1):
         b[n, :] = 1.
-    assert np.array_equal(b.toarray(), np.ones(n1*n2, dtype=float))
+    assert xp.array_equal(b.toarray(), xp.ones(n1*n2, dtype=float))
 
     # Create Inverse
     tol = 1e-6
@@ -1026,7 +1026,7 @@ def test_dot_inner():
 
     # Set the values of b and c randomly from a uniform distribution over the
     # interval [0, 1)
-    rng = np.random.default_rng(seed=42)
+    rng = xp.random.default_rng(seed=42)
     for bj in b:
         Vj = bj.space
         rng.random(size=Vj.shape, dtype=Vj.dtype, out=bj._data)
diff --git a/psydac/linalg/tests/test_matrix_free.py b/psydac/linalg/tests/test_matrix_free.py
index 0defc658e..b847adc15 100644
--- a/psydac/linalg/tests/test_matrix_free.py
+++ b/psydac/linalg/tests/test_matrix_free.py
@@ -1,5 +1,5 @@
 import pytest
-import numpy as np
+import cunumpy as xp
 
 from psydac.linalg.block import BlockLinearOperator, BlockVector, BlockVectorSpace
 from psydac.linalg.basic import LinearOperator, ZeroOperator, IdentityOperator, ComposedLinearOperator, SumLinearOperator, PowerLinearOperator, ScaledLinearOperator
@@ -12,7 +12,7 @@
 
 def get_random_StencilMatrix(domain, codomain):
 
-    np.random.seed(2)
+    xp.random.seed(2)
     V = domain
     W = codomain
     assert isinstance(V, StencilVectorSpace)
@@ -32,22 +32,22 @@ def get_random_StencilMatrix(domain, codomain):
     for i in range(0, q1+1):
         if i != 0:
             for j in range(-q2, q2+1):
-                S[:, :, i, j] = 2*np.random.random()-1
+                S[:, :, i, j] = 2*xp.random.random()-1
         else:
             for j in range(1, q2+1):
-                S[:, :, i, j] = 2*np.random.random()-1
+                S[:, :, i, j] = 2*xp.random.random()-1
     S.remove_spurious_entries()
 
     return S
 
 def get_random_StencilVector(V):
-    np.random.seed(3)
+    xp.random.seed(3)
     assert isinstance(V, StencilVectorSpace)
     [n1, n2] = V._npts
     v = StencilVector(V)
     for i in range(n1):
         for j in range(n2):
-            v[i,j] = np.random.random()
+            v[i,j] = xp.random.random()
     return v
 
 #===============================================================================
@@ -75,11 +75,11 @@ def test_fake_matrix_free(n1, n2, p1, p2):
     tol = 1e-10
     y = S.dot(v)
     x = O.dot(v)
-    print(f'error = {np.linalg.norm( (x - y).toarray() )}')
-    assert np.linalg.norm( (x - y).toarray() ) < tol
+    print(f'error = {xp.linalg.norm( (x - y).toarray() )}')
+    assert xp.linalg.norm( (x - y).toarray() ) < tol
     O.dot(v, out=x)
-    print(f'error = {np.linalg.norm( (x - y).toarray() )}')
-    assert np.linalg.norm( (x - y).toarray() ) < tol
+    print(f'error = {xp.linalg.norm( (x - y).toarray() )}')
+    assert xp.linalg.norm( (x - y).toarray() ) < tol
 
 @pytest.mark.parametrize('solver', ['cg', 'pcg', 'bicg', 'minres', 'lsmr'])
 
@@ -111,13 +111,13 @@ def test_solvers_matrix_free(solver):
 
     AA = A_inv._A
     xx = AA.dot(b)
-    print(f'norm(xx) = {np.linalg.norm( xx.toarray() )}')
-    print(f'norm(x)  = {np.linalg.norm( x.toarray() )}')
+    print(f'norm(xx) = {xp.linalg.norm( xx.toarray() )}')
+    print(f'norm(x)  = {xp.linalg.norm( x.toarray() )}')
 
     # Apply inverse and check
     y = A_inv @ x
-    error = np.linalg.norm( (b - y).toarray())
-    assert np.linalg.norm( (b - y).toarray() ) < tol
+    error = xp.linalg.norm( (b - y).toarray())
+    assert xp.linalg.norm( (b - y).toarray() ) < tol
 
 #===============================================================================
 # SCRIPT FUNCTIONALITY
diff --git a/psydac/linalg/tests/test_solvers.py b/psydac/linalg/tests/test_solvers.py
index 5a4a47d35..2d0710e38 100644
--- a/psydac/linalg/tests/test_solvers.py
+++ b/psydac/linalg/tests/test_solvers.py
@@ -1,5 +1,5 @@
 
-import numpy as np
+import cunumpy as xp
 import pytest
 from psydac.linalg.solvers import inverse
 from psydac.linalg.stencil import StencilVectorSpace, StencilMatrix, StencilVector
@@ -9,7 +9,7 @@
 
 def define_data_hermitian(n, p, dtype=float):
     domain_decomposition = DomainDecomposition([n - p], [False])
-    cart = CartDecomposition(domain_decomposition, [n], [np.array([0])], [np.array([n - 1])], [p], [1])
+    cart = CartDecomposition(domain_decomposition, [n], [xp.array([0])], [xp.array([n - 1])], [p], [1])
     # ... Vector Spaces
     V = StencilVectorSpace(cart,dtype=dtype)
     e = V.ends[0]
@@ -29,12 +29,12 @@ def define_data_hermitian(n, p, dtype=float):
 
     # Build exact solution
     xe = StencilVector(V)
-    xe[s:e + 1] = factor*np.random.random(e + 1 - s)
+    xe[s:e + 1] = factor*xp.random.random(e + 1 - s)
     return(V, A, xe)
 
 def define_data(n, p, matrix_data, dtype=float):
     domain_decomposition = DomainDecomposition([n - p], [False])
-    cart = CartDecomposition(domain_decomposition, [n], [np.array([0])], [np.array([n - 1])], [p], [1])
+    cart = CartDecomposition(domain_decomposition, [n], [xp.array([0])], [xp.array([n - 1])], [p], [1])
     # ... Vector Spaces
     V = StencilVectorSpace(cart, dtype=dtype)
     e = V.ends[0]
@@ -51,7 +51,7 @@ def define_data(n, p, matrix_data, dtype=float):
 
     # Build exact solution
     xe = StencilVector(V)
-    xe[s:e + 1] = np.random.random(e + 1 - s)
+    xe[s:e + 1] = xp.random.random(e + 1 - s)
     return(V, A, xe)
 
 
@@ -126,29 +126,29 @@ def test_solver_tridiagonal(n, p, dtype, solver, verbose=False):
     x = solv @ be
     info = solv.get_info()
     solv_x0 = solv._options["x0"]
-    assert np.array_equal(x.toarray(), solv_x0.toarray())
+    assert xp.array_equal(x.toarray(), solv_x0.toarray())
     assert x is not solv_x0
 
     x2 = solv @ be2
     solv_x0 = solv._options["x0"]
-    assert np.array_equal(x2.toarray(), solv_x0.toarray())
+    assert xp.array_equal(x2.toarray(), solv_x0.toarray())
     assert x2 is not solv_x0
 
     xt = solvt.solve(bet)
     solvt_x0 = solvt._options["x0"]
-    assert np.array_equal(xt.toarray(), solvt_x0.toarray())
+    assert xp.array_equal(xt.toarray(), solvt_x0.toarray())
     assert xt is not solvt_x0
 
     xh = solvh.dot(beh)
     solvh_x0 = solvh._options["x0"]
-    assert np.array_equal(xh.toarray(), solvh_x0.toarray())
+    assert xp.array_equal(xh.toarray(), solvh_x0.toarray())
     assert xh is not solvh_x0
 
     if solver != 'pcg':
         # PCG only works with operators with diagonal
         xc = solv2 @ be2
         solv2_x0 = solv2._options["x0"]
-        assert np.array_equal(xc.toarray(), solv2_x0.toarray())
+        assert xp.array_equal(xc.toarray(), solv2_x0.toarray())
         assert xc is not solv2_x0
 
 
@@ -161,17 +161,17 @@ def test_solver_tridiagonal(n, p, dtype, solver, verbose=False):
         bc = A @ A @ xc
 
     err = b - be
-    err_norm = np.linalg.norm( err.toarray() )
+    err_norm = xp.linalg.norm( err.toarray() )
     err2 = b2 - be2
-    err2_norm = np.linalg.norm( err2.toarray() )
+    err2_norm = xp.linalg.norm( err2.toarray() )
     errt = bt - bet
-    errt_norm = np.linalg.norm( errt.toarray() )
+    errt_norm = xp.linalg.norm( errt.toarray() )
     errh = bh - beh
-    errh_norm = np.linalg.norm( errh.toarray() )
+    errh_norm = xp.linalg.norm( errh.toarray() )
 
     if solver != 'pcg': 
         errc = bc - be2
-        errc_norm = np.linalg.norm( errc.toarray() )
+        errc_norm = xp.linalg.norm( errc.toarray() )
 
     #---------------------------------------------------------------------------
     # TERMINAL OUTPUT
diff --git a/psydac/linalg/tests/test_stencil_interface_matrix.py b/psydac/linalg/tests/test_stencil_interface_matrix.py
index b0fa74f08..3d347c72b 100644
--- a/psydac/linalg/tests/test_stencil_interface_matrix.py
+++ b/psydac/linalg/tests/test_stencil_interface_matrix.py
@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
 
 import pytest
-import numpy as np
+import cunumpy as xp
 from random import random
 
 from psydac.linalg.stencil import StencilVectorSpace, StencilVector, StencilMatrix, StencilInterfaceMatrix
@@ -19,7 +19,7 @@ def compute_global_starts_ends(domain_decomposition, npts, pads):
 
         global_ends  [axis]     = ee.copy()
         global_ends  [axis][-1] = npts[axis]-1
-        global_starts[axis]     = np.array([0] + (global_ends[axis][:-1]+1).tolist())
+        global_starts[axis]     = xp.array([0] + (global_ends[axis][:-1]+1).tolist())
 
     for s, e, p in zip(global_starts, global_ends, pads):
         assert all(e - s + 1 >= p)
@@ -91,7 +91,7 @@ def test_stencil_interface_matrix_1d_serial_init(dtype, n1, p1, s1, axis, ext, P
     assert M.domain_start == (0,) * M.dim
     assert M.codomain_start == (0,) * M.dim
     assert M.flip == (1,) * M.dim
-    assert np.array_equal(M.permutation, [0])
+    assert xp.array_equal(M.permutation, [0])
     assert M.pads == (p1,)
     assert M.backend == None
     assert M._data.shape == (p1 + 1 + 2 * p1 * s1, 1 + 2 * p1)
@@ -146,9 +146,9 @@ def test_stencil_interface_matrix_2d_serial_init(dtype, n1, n2, p1, p2, s1, s2,
     elif axis2 == 1:
         assert M._data.shape == (n1 + 2 * p1 * s1, p2 + 1 + 2 * p2 * s2, 1 + 2 * p1, 1 + 2 * p2)
     if axis1 == axis2:
-        assert np.array_equal(M.permutation, [0, 1])
+        assert xp.array_equal(M.permutation, [0, 1])
     else:
-        assert np.array_equal(M.permutation, [1, 0])
+        assert xp.array_equal(M.permutation, [1, 0])
     assert M.shape == (n1 * n2, n1 * n2)
 
 # ===============================================================================
@@ -208,11 +208,11 @@ def test_stencil_interface_matrix_3d_serial_init(dtype, n1, n2, n3, p1, p2, p3,
         assert M._data.shape == (
         n1 + 2 * p1 * s1, n2 + 2 * p2 * s2, p3 + 1 + 2 * p3 * s3, 1 + 2 * p1, 1 + 2 * p2, 1 + 2 * p3)
     if axis1 == axis2:
-        assert np.array_equal(M.permutation, [0, 1, 2])
+        assert xp.array_equal(M.permutation, [0, 1, 2])
     else:
         permutation = [0, 1, 2]
         permutation[axis1], permutation[axis2] = permutation[axis2], permutation[axis1]
-        assert np.array_equal(M.permutation, permutation)
+        assert xp.array_equal(M.permutation, permutation)
     assert M.shape == (n1 * n2 * n3, n1 * n2 * n3)
 #===============================================================================
 # Parallel TESTS
@@ -262,7 +262,7 @@ def test_stencil_interface_matrix_2d_parallel_dot(n1, n2, p1, p2, expected):
 
             global_ends  [j]     = ee.copy()
             global_ends  [j][-1] = n[i][j]-1
-            global_starts[j]     = np.array([0] + (global_ends[j][:-1]+1).tolist())
+            global_starts[j]     = xp.array([0] + (global_ends[j][:-1]+1).tolist())
 
         carts.append(CartDecomposition(
                         domain_decomposition      = domain_decomposition.domains[i],
diff --git a/psydac/linalg/tests/test_stencil_vector.py b/psydac/linalg/tests/test_stencil_vector.py
index 3b95419bd..bcb5a8df2 100644
--- a/psydac/linalg/tests/test_stencil_vector.py
+++ b/psydac/linalg/tests/test_stencil_vector.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 
 import pytest
-import numpy as np
+import cunumpy as xp
 
 from psydac.ddm.mpi import mpi as MPI
 from psydac.linalg.stencil import StencilVectorSpace, StencilVector, StencilMatrix
@@ -23,7 +23,7 @@ def compute_global_starts_ends(domain_decomposition, npts):
 
         global_ends[axis] = ee.copy()
         global_ends[axis][-1] = npts[axis] - 1
-        global_starts[axis] = np.array([0] + (global_ends[axis][:-1] + 1).tolist())
+        global_starts[axis] = xp.array([0] + (global_ends[axis][:-1] + 1).tolist())
 
     return global_starts, global_ends
 
@@ -82,11 +82,11 @@ def test_stencil_vector_2d_serial_copy(dtype, n1, n2, p1, p2, s1, s2, P1=True, P
     x = StencilVector(V)
 
     # Take random data, but determinize it
-    np.random.seed(2)
+    xp.random.seed(2)
     if dtype == complex:
-        x._data[:] = np.random.random(x._data.shape) + 1j * np.random.random(x._data.shape)
+        x._data[:] = xp.random.random(x._data.shape) + 1j * xp.random.random(x._data.shape)
     else:
-        x._data[:] = np.random.random(x._data.shape)
+        x._data[:] = xp.random.random(x._data.shape)
 
     # Compute the copy
     z = x.copy()
@@ -96,7 +96,7 @@ def test_stencil_vector_2d_serial_copy(dtype, n1, n2, p1, p2, s1, s2, P1=True, P
     assert z.space is V
     assert z._data is not x._data
     assert z.dtype == dtype
-    assert np.array_equal(x._data, z._data)
+    assert xp.array_equal(x._data, z._data)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -120,21 +120,21 @@ def test_stencil_vector_2d_basic_ops(dtype, n1, n2, p1, p2, s1, s2, P1=True, P2=
     M = StencilVector(V)
 
     # take random data, but determinize it
-    np.random.seed(2)
+    xp.random.seed(2)
     if dtype == complex:
-        M._data[:] = np.random.random(M._data.shape) + 1j * np.random.random(M._data.shape)
+        M._data[:] = xp.random.random(M._data.shape) + 1j * xp.random.random(M._data.shape)
     else:
-        M._data[:] = np.random.random(M._data.shape)
+        M._data[:] = xp.random.random(M._data.shape)
 
     # Test classical basic operation
     assert (M * 2).dtype == dtype
-    assert np.array_equal((M * 2)._data, M._data * 2)
+    assert xp.array_equal((M * 2)._data, M._data * 2)
     assert (M / 2).dtype == dtype
-    assert np.array_equal((M / 2)._data, M._data / 2)
+    assert xp.array_equal((M / 2)._data, M._data / 2)
     assert (M + M).dtype == dtype
-    assert np.array_equal((M + M)._data, M._data + M._data)
+    assert xp.array_equal((M + M)._data, M._data + M._data)
     assert (M - M).dtype == dtype
-    assert np.array_equal((M - M)._data, M._data - M._data)
+    assert xp.array_equal((M - M)._data, M._data - M._data)
 
     M1 = M.copy()
     M1 *= 2
@@ -150,7 +150,7 @@ def test_stencil_vector_2d_basic_ops(dtype, n1, n2, p1, p2, s1, s2, P1=True, P2=
         assert isinstance(m, StencilVector)
         assert m.dtype == dtype
         assert m.space is V
-        assert np.array_equal(m._data, mex)
+        assert xp.array_equal(m._data, mex)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -188,8 +188,8 @@ def test_stencil_vector_2d_serial_toarray(dtype, n1, n2, p1, p2, s1, s2, P1=True
     xf = x.toarray(order='F')
 
     # Create our exact arrays
-    zc = np.zeros((n1 * n2),dtype=dtype)
-    zf = np.zeros((n1 * n2),dtype=dtype)
+    zc = xp.zeros((n1 * n2),dtype=dtype)
+    zf = xp.zeros((n1 * n2),dtype=dtype)
     for i1 in range(n1):
         for i2 in range(n2):
             zc[i1 * n2 + i2] = f(i1,i2)
@@ -199,7 +199,7 @@ def test_stencil_vector_2d_serial_toarray(dtype, n1, n2, p1, p2, s1, s2, P1=True
     for (x, z) in zip([xc, xf], [zc, zf]):
         assert x.shape == (n1*n2,)
         assert x.dtype == dtype
-        assert np.array_equal(xc, zc)
+        assert xp.array_equal(xc, zc)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -224,11 +224,11 @@ def test_stencil_vector_2d_serial_math(dtype, n1, n2, p1, p2, s1, s2, P1=True, P
     y = StencilVector(V)
 
     # take random data, but determinize it
-    np.random.seed(2)
+    xp.random.seed(2)
     if dtype == complex:
-        x._data[:] = np.random.random(x._data.shape) + 1j * np.random.random(x._data.shape)
+        x._data[:] = xp.random.random(x._data.shape) + 1j * xp.random.random(x._data.shape)
     else:
-        x._data[:] = np.random.random(x._data.shape)
+        x._data[:] = xp.random.random(x._data.shape)
 
     y[:, :] = 42.0
 
@@ -251,7 +251,7 @@ def test_stencil_vector_2d_serial_math(dtype, n1, n2, p1, p2, s1, s2, P1=True, P
         assert isinstance(r, StencilVector)
         assert r.space is V
         assert r.dtype == dtype
-        assert np.array_equal(r.toarray(), rex)
+        assert xp.array_equal(r.toarray(), rex)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -294,9 +294,9 @@ def test_stencil_vector_2d_serial_dot(dtype, n1, n2, p1, p2, s1, s2, P1=True, P2
 
     # Exact value by Numpy dot and vdot
     if dtype==complex:
-        z_exact = np.vdot(x.toarray(), y.toarray())
+        z_exact = xp.vdot(x.toarray(), y.toarray())
     else:
-        z_exact = np.dot(x.toarray(), y.toarray())
+        z_exact = xp.dot(x.toarray(), y.toarray())
 
     # Compute axpy exact sol
     if dtype == complex:
@@ -312,7 +312,7 @@ def test_stencil_vector_2d_serial_dot(dtype, n1, n2, p1, p2, s1, s2, P1=True, P2
     assert z2.dtype == dtype
     assert z1 == z_exact
     assert z2 == z_exact.conjugate()
-    assert np.allclose(x._data, z3._data)
+    assert xp.allclose(x._data, z3._data)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -355,8 +355,8 @@ def test_stencil_vector_2d_serial_conjugate(dtype, n1, n2, p1, p2, s1, s2, P1=Tr
 
     # Test the exact value
     assert z1.dtype == dtype
-    assert np.array_equal(z1._data, z_exact)
-    assert np.array_equal(z2._data, z_exact)
+    assert xp.array_equal(z1._data, z_exact)
+    assert xp.array_equal(z2._data, z_exact)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -405,7 +405,7 @@ def test_stencil_vector_2d_serial_array_to_psydac(dtype, n1, n2, p1, p2, s1, s2,
     assert v.pads == (p1, p2)
     assert v._data.shape == (n1 + 2 * p1 * s1, n2 + 2 * p2 * s2)
     assert v._data.dtype == dtype
-    assert np.array_equal(xa, v.toarray())
+    assert xp.array_equal(xa, v.toarray())
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -454,7 +454,7 @@ def test_stencil_vector_2d_serial_topetsc(dtype, n1, n2, p1, p2, s1, s2, P1, P2)
     assert v.pads == (p1, p2)
     assert v._data.shape == (n1 + 2 * p1 * s1, n2 + 2 * p2 * s2)
     assert v._data.dtype == dtype
-    assert np.array_equal(x.toarray(), v.toarray())
+    assert xp.array_equal(x.toarray(), v.toarray())
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -498,26 +498,26 @@ def test_stencil_vector_2d_serial_update_ghost_region_interior(dtype, n1, n2, p1
     # Compare vectors ghost region to the exact value
     if P1:
         # Left region with corner
-        assert np.array_equal(data[0:p1 * s1, :], data[n1:n1 + p1 * s1, :])
+        assert xp.array_equal(data[0:p1 * s1, :], data[n1:n1 + p1 * s1, :])
         # Right region with corner
-        assert np.array_equal(data[n1 + p1 * s1:n1 + 2 * p1 * s1, :], data[p1 * s1:2 * p1 * s1, :])
+        assert xp.array_equal(data[n1 + p1 * s1:n1 + 2 * p1 * s1, :], data[p1 * s1:2 * p1 * s1, :])
     else:
         # Left region with corner
-        assert np.array_equal(data[0:p1 * s1, :], np.zeros((p1 * s1, n2 + 2 * p2 * s2), dtype=dtype))
+        assert xp.array_equal(data[0:p1 * s1, :], xp.zeros((p1 * s1, n2 + 2 * p2 * s2), dtype=dtype))
         # Right region with corner
-        assert np.array_equal(data[n1 + p1 * s1:n1 + 2 * p1 * s1, :],
-                              np.zeros((p1 * s1, n2 + 2 * p2 * s2), dtype=dtype))
+        assert xp.array_equal(data[n1 + p1 * s1:n1 + 2 * p1 * s1, :],
+                              xp.zeros((p1 * s1, n2 + 2 * p2 * s2), dtype=dtype))
     if P2:
         # Left region with corner
-        assert np.array_equal(data[:, 0:p2 * s2], data[:, n2:n2 + p2 * s2])
+        assert xp.array_equal(data[:, 0:p2 * s2], data[:, n2:n2 + p2 * s2])
         # Right region with corner
-        assert np.array_equal(data[:, n2 + p2 * s2:n2 + 2 * p2 * s2], data[:, p2 * s2:2 * p2 * s2])
+        assert xp.array_equal(data[:, n2 + p2 * s2:n2 + 2 * p2 * s2], data[:, p2 * s2:2 * p2 * s2])
     else:
         # Left region
-        assert np.array_equal(data[:, 0:p2 * s2], np.zeros((n1 + 2 * p1 * s1, p2 * s2), dtype=dtype))
+        assert xp.array_equal(data[:, 0:p2 * s2], xp.zeros((n1 + 2 * p1 * s1, p2 * s2), dtype=dtype))
         # Right region with corner
-        assert np.array_equal(data[:, n2 + p2 * s2:n2 + 2 * p2 * s2],
-                              np.zeros((n1 + 2 * p1 * s1, p2 * s2), dtype=dtype))
+        assert xp.array_equal(data[:, n2 + p2 * s2:n2 + 2 * p2 * s2],
+                              xp.zeros((n1 + 2 * p1 * s1, p2 * s2), dtype=dtype))
 
 # ===============================================================================
 # PARALLEL TESTS
@@ -633,7 +633,7 @@ def test_stencil_vector_2d_parallel_topetsc(dtype, n1, n2, p1, p2, s1, s2, P1, P
     # Convert PETSc.Vec to StencilVector of V
     v = petsc_to_psydac(v, V)
 
-    assert np.array_equal(x.toarray(), v.toarray())
+    assert xp.array_equal(x.toarray(), v.toarray())
     
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -675,7 +675,7 @@ def test_stencil_vector_1d_parallel_topetsc(dtype, n1, p1, s1, P1):
     # Convert PETSc.Vec to StencilVector of V
     v = petsc_to_psydac(v, V)
 
-    assert np.array_equal(x.toarray(), v.toarray())
+    assert xp.array_equal(x.toarray(), v.toarray())
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -729,7 +729,7 @@ def test_stencil_vector_3d_parallel_topetsc(dtype, n1, n2, n3, p1, p2, p3, s1, s
     # Convert PETSc.Vec to StencilVector of V
     v = petsc_to_psydac(v, V)
 
-    assert np.array_equal(x.toarray(), v.toarray())
+    assert xp.array_equal(x.toarray(), v.toarray())
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -809,8 +809,8 @@ def test_stencil_vector_2d_parallel_toarray(dtype, n1, n2, p1, p2, s1, s2, P1=Tr
     assert x.dtype == dtype
 
     # Construct local 2D array manually
-    z1 = np.zeros((n1, n2), dtype=dtype)
-    z2 = np.zeros((n2, n1), dtype=dtype)
+    z1 = xp.zeros((n1, n2), dtype=dtype)
+    z2 = xp.zeros((n2, n1), dtype=dtype)
     for i1 in range(cart.starts[0], cart.ends[0] + 1):
         for i2 in range(cart.starts[1], cart.ends[1] + 1):
             z1[i1, i2] = f(i1, i2)
@@ -824,8 +824,8 @@ def test_stencil_vector_2d_parallel_toarray(dtype, n1, n2, p1, p2, s1, s2, P1=Tr
 
     assert xa1.dtype == dtype
     assert xa1.shape == (n1 * n2,)
-    assert np.array_equal(xa1, za1)
-    assert np.array_equal(xa2, za2)
+    assert xp.array_equal(xa1, za1)
+    assert xp.array_equal(xa2, za2)
 
     # # Verify toarray() with padding: internal region should not change
     # xe = x.toarray(with_pads=True)
@@ -833,7 +833,7 @@ def test_stencil_vector_2d_parallel_toarray(dtype, n1, n2, p1, p2, s1, s2, P1=Tr
     #
     # assert xe.dtype == dtype
     # assert xe.shape == (n1, n2)
-    # assert np.array_equal(xe, z1)
+    # assert xp.array_equal(xe, z1)
 
 # ===============================================================================
 @pytest.mark.parametrize('dtype', [float, complex])
@@ -880,7 +880,7 @@ def test_stencil_vector_2d_parallel_array_to_psydac(dtype, n1, n2, p1, p2, s1, s
     v_l_inv = array_to_psydac(xa, V)
 
     # Apply array_to_psydac first, and toarray next
-    xa_r_inv = np.array(np.random.rand(xa.size), dtype=dtype)*xa # the vector must be distributed as xa
+    xa_r_inv = xp.array(xp.random.rand(xa.size), dtype=dtype)*xa # the vector must be distributed as xa
     x_r_inv = array_to_psydac(xa_r_inv, V)
     x_r_inv.update_ghost_regions()
     va_r_inv = x_r_inv.toarray()
@@ -889,9 +889,9 @@ def test_stencil_vector_2d_parallel_array_to_psydac(dtype, n1, n2, p1, p2, s1, s
     # left inverse:
     assert isinstance(v_l_inv, StencilVector)
     assert v_l_inv.space is V    
-    assert np.array_equal(x._data, v_l_inv._data)
+    assert xp.array_equal(x._data, v_l_inv._data)
     # right inverse:
-    assert np.array_equal(xa_r_inv, va_r_inv)
+    assert xp.array_equal(xa_r_inv, va_r_inv)
 
 # TODO: test that ghost regions have been properly copied to 'xe' array
 # ===============================================================================
@@ -938,10 +938,10 @@ def test_stencil_vector_2d_parallel_dot(dtype, n1, n2, p1, p2, s1, s2, P1=True,
 
     # Compute exact value with Numpy dot
     if dtype==complex:
-        res_ex1 = comm.allreduce(np.vdot(x.toarray(), y.toarray()))
-        res_ex2 = comm.allreduce(np.vdot(y.toarray(), x.toarray()))
+        res_ex1 = comm.allreduce(xp.vdot(x.toarray(), y.toarray()))
+        res_ex2 = comm.allreduce(xp.vdot(y.toarray(), x.toarray()))
     else:
-        res_ex1 = comm.allreduce(np.dot(x.toarray(), y.toarray()))
+        res_ex1 = comm.allreduce(xp.dot(x.toarray(), y.toarray()))
         res_ex2 = res_ex1
 
     # Compute axpy exact sol
@@ -954,7 +954,7 @@ def test_stencil_vector_2d_parallel_dot(dtype, n1, n2, p1, p2, s1, s2, P1=True,
     x.mul_iadd(cst, y)
 
     # Test exact value and symmetry of the scalar product
-    assert np.allclose(x._data, z3._data)
+    assert xp.allclose(x._data, z3._data)
     assert res1 == res_ex1
     assert res2 == res_ex2
 
@@ -1006,10 +1006,10 @@ def test_stencil_vector_3d_parallel_dot(dtype, n1, n2, n3, p1, p2, p3, s1, s2, s
     # Compute exact value with Numpy dot
 
     if dtype == complex:
-        res_ex1 = comm.allreduce(np.vdot(x.toarray(), y.toarray()))
-        res_ex2 = comm.allreduce(np.vdot(y.toarray(), x.toarray()))
+        res_ex1 = comm.allreduce(xp.vdot(x.toarray(), y.toarray()))
+        res_ex2 = comm.allreduce(xp.vdot(y.toarray(), x.toarray()))
     else:
-        res_ex1 = comm.allreduce(np.dot(x.toarray(), y.toarray()))
+        res_ex1 = comm.allreduce(xp.dot(x.toarray(), y.toarray()))
         res_ex2 = res_ex1
 
     # Compute axpy exact sol
@@ -1022,7 +1022,7 @@ def test_stencil_vector_3d_parallel_dot(dtype, n1, n2, n3, p1, p2, p3, s1, s2, s
     x.mul_iadd(cst, y)
 
     # Test exact value and symmetry of the scalar product
-    assert np.allclose(x._data, z3._data)
+    assert xp.allclose(x._data, z3._data)
 
     assert res1 == res_ex1
     assert res2 == res_ex2
diff --git a/psydac/linalg/tests/test_stencil_vector_space.py b/psydac/linalg/tests/test_stencil_vector_space.py
index 7ada7ca50..78392efd1 100644
--- a/psydac/linalg/tests/test_stencil_vector_space.py
+++ b/psydac/linalg/tests/test_stencil_vector_space.py
@@ -1,5 +1,5 @@
 import pytest
-import numpy as np
+import cunumpy as xp
 
 from psydac.ddm.mpi import mpi as MPI
 from psydac.linalg.stencil import StencilVectorSpace, StencilVector
@@ -17,7 +17,7 @@ def compute_global_starts_ends(domain_decomposition, npts):
 
         global_ends[axis] = ee.copy()
         global_ends[axis][-1] = npts[axis] - 1
-        global_starts[axis] = np.array([0] + (global_ends[axis][:-1] + 1).tolist())
+        global_starts[axis] = xp.array([0] + (global_ends[axis][:-1] + 1).tolist())
 
     return global_starts, global_ends
 
@@ -206,7 +206,7 @@ def test_stencil_vector_space_2D_serial_zeros(dtype, n1, n2, p1, p2, s1, s2, P1=
     assert x._data.shape == (n1+2*p1*s1, n2+2*p2*s2)
     assert x.pads == (p1, p2)
     assert x._data.dtype == dtype
-    assert np.array_equal(x._data, np.zeros((n1+2*p1*s1, n2+2*p2*s2), dtype=dtype))
+    assert xp.array_equal(x._data, xp.zeros((n1+2*p1*s1, n2+2*p2*s2), dtype=dtype))
 # ===============================================================================
 
 @pytest.mark.parametrize('dtype', [float, complex])
diff --git a/psydac/linalg/topetsc.py b/psydac/linalg/topetsc.py
index d31cb5ca8..28734968e 100644
--- a/psydac/linalg/topetsc.py
+++ b/psydac/linalg/topetsc.py
@@ -1,6 +1,6 @@
 from itertools import product as cartesian_prod
 
-import numpy as np
+import cunumpy as xp
 
 from psydac.linalg.basic   import VectorSpace
 from psydac.linalg.block   import BlockVectorSpace, BlockVector, BlockLinearOperator
@@ -24,13 +24,13 @@
 
 
 def get_index_shift_per_block_per_process(V):
-    npts_local_per_block_per_process = np.array(get_npts_per_block(V)) #indexed [b,k,d] for block b and process k and dimension d
-    local_sizes_per_block_per_process = np.prod(npts_local_per_block_per_process, axis=-1) #indexed [b,k] for block b and process k
+    npts_local_per_block_per_process = xp.array(get_npts_per_block(V)) #indexed [b,k,d] for block b and process k and dimension d
+    local_sizes_per_block_per_process = xp.prod(npts_local_per_block_per_process, axis=-1) #indexed [b,k] for block b and process k
 
     n_blocks = npts_local_per_block_per_process.shape[0]
     n_procs = npts_local_per_block_per_process.shape[1]
 
-    index_shift_per_block_per_process = [[0 + np.sum(local_sizes_per_block_per_process[:,:k]) + np.sum(local_sizes_per_block_per_process[:b,k]) for k in range(n_procs)] for b in range(n_blocks)]
+    index_shift_per_block_per_process = [[0 + xp.sum(local_sizes_per_block_per_process[:,:k]) + xp.sum(local_sizes_per_block_per_process[:b,k]) for k in range(n_procs)] for b in range(n_blocks)]
 
     return index_shift_per_block_per_process #Global variable indexed as [b][k] fo block b, process k
 
@@ -41,33 +41,33 @@ def toIJVrowmap(mat_block, bd, bc, I, J, V, rowmap, dspace, cspace, dnpts_block,
     cspace_block = cspace if isinstance(cspace, StencilVectorSpace) else cspace.spaces[bc]       
 
     # Shortcuts
-    cnl = [np.int64(n) for n in get_npts_local(cspace_block)[0]] 
-    dng = [np.int64(n) for n in dspace_block.cart.npts]
-    cs = [np.int64(s) for s in cspace_block.cart.starts]
-    cp = [np.int64(p) for p in cspace_block.cart.pads]
-    cm = [np.int64(m) for m in cspace_block.cart.shifts]
-    dsh = np.array(dshift_block, dtype='int64')
-    csh = np.array(cshift_block, dtype='int64')
-
-    dgs = [np.array(gs, dtype='int64') for gs in dspace_block.cart.global_starts] # Global variable
-    dge = [np.array(ge, dtype='int64') for ge in dspace_block.cart.global_ends] # Global variable
-    cgs = [np.array(gs, dtype='int64') for gs in cspace_block.cart.global_starts] # Global variable
-    cge = [np.array(ge, dtype='int64') for ge in cspace_block.cart.global_ends] # Global variable
-
-    dnlb = [np.array([n[d] for n in dnpts_block], dtype='int64') for d in range(dspace_block.cart.ndim)] 
-    cnlb = [np.array([n[d] for n in cnpts_block] , dtype='int64') for d in range(cspace_block.cart.ndim)]
+    cnl = [xp.int64(n) for n in get_npts_local(cspace_block)[0]] 
+    dng = [xp.int64(n) for n in dspace_block.cart.npts]
+    cs = [xp.int64(s) for s in cspace_block.cart.starts]
+    cp = [xp.int64(p) for p in cspace_block.cart.pads]
+    cm = [xp.int64(m) for m in cspace_block.cart.shifts]
+    dsh = xp.array(dshift_block, dtype='int64')
+    csh = xp.array(cshift_block, dtype='int64')
+
+    dgs = [xp.array(gs, dtype='int64') for gs in dspace_block.cart.global_starts] # Global variable
+    dge = [xp.array(ge, dtype='int64') for ge in dspace_block.cart.global_ends] # Global variable
+    cgs = [xp.array(gs, dtype='int64') for gs in cspace_block.cart.global_starts] # Global variable
+    cge = [xp.array(ge, dtype='int64') for ge in cspace_block.cart.global_ends] # Global variable
+
+    dnlb = [xp.array([n[d] for n in dnpts_block], dtype='int64') for d in range(dspace_block.cart.ndim)] 
+    cnlb = [xp.array([n[d] for n in cnpts_block] , dtype='int64') for d in range(cspace_block.cart.ndim)]
 
     # Range of data owned by local process (no ghost regions)
     local = tuple( [slice(m*p,-m*p) for p,m in zip(cp, cm)] + [slice(None)] * dspace_block.cart.ndim )
     shape  = mat_block._data[local].shape
-    nrows = np.prod(shape[0:dspace_block.cart.ndim])
-    nentries = np.prod(shape)
+    nrows = xp.prod(shape[0:dspace_block.cart.ndim])
+    nentries = xp.prod(shape)
 
     # locally block I, J, V, rowmap storage
-    Ib = np.zeros(nrows + 1, dtype='int64')
-    Jb = np.zeros(nentries, dtype='int64')
-    rowmapb = np.zeros(nrows, dtype='int64')
-    Vb = np.zeros(nentries, dtype=mat_block._data.dtype)
+    Ib = xp.zeros(nrows + 1, dtype='int64')
+    Jb = xp.zeros(nentries, dtype='int64')
+    rowmapb = xp.zeros(nrows, dtype='int64')
+    Vb = xp.zeros(nentries, dtype=mat_block._data.dtype)
 
     Ib[0] += I[-1]
 
@@ -111,17 +111,17 @@ def petsc_local_to_psydac(
     """
 
     # Get the number of points for each block and each dimension local to the current process:
-    npts_local_per_block = np.array(get_npts_local(V)) # indexed [b,d] for block b and dimension d
+    npts_local_per_block = xp.array(get_npts_local(V)) # indexed [b,d] for block b and dimension d
     # Get the local size of the current process for each block:
-    local_sizes_per_block = np.prod(npts_local_per_block, axis=-1)  # indexed [b] for block b
+    local_sizes_per_block = xp.prod(npts_local_per_block, axis=-1)  # indexed [b] for block b
     # Compute the accumulated local size of the current process for each block:
-    accumulated_local_sizes_per_block = np.concatenate((np.zeros((1,), dtype=int), np.cumsum(local_sizes_per_block, axis=0))) #indexed [b+1] for block b
+    accumulated_local_sizes_per_block = xp.concatenate((xp.zeros((1,), dtype=int), xp.cumsum(local_sizes_per_block, axis=0))) #indexed [b+1] for block b
 
     n_blocks = local_sizes_per_block.size
 
     # Find the block where the index belongs to:
-    bb = np.nonzero(
-            np.array(
+    bb = xp.nonzero(
+            xp.array(
                 [petsc_index in range(accumulated_local_sizes_per_block[b], accumulated_local_sizes_per_block[b+1]) 
                     for b in range(n_blocks)]
             ))[0][0]
@@ -139,7 +139,7 @@ def petsc_local_to_psydac(
     # Get the PETSc index local within the block:
     petsc_index -= accumulated_local_sizes_per_block[bb]
     
-    ii = np.zeros((ndim,), dtype=int)
+    ii = xp.zeros((ndim,), dtype=int)
     if ndim == 1:
         ii[0] = petsc_index + p[0]*m[0]
 
@@ -188,9 +188,9 @@ def psydac_to_petsc_global(
 
     bb = block_indices[0]
     # Get the number of points per block, per process and per dimension:
-    npts_local_per_block_per_process = np.array(get_npts_per_block(V)) #indexed [b,k,d] for block b and process k and dimension d
+    npts_local_per_block_per_process = xp.array(get_npts_per_block(V)) #indexed [b,k,d] for block b and process k and dimension d
     # Get the local sizes per block and per process:
-    local_sizes_per_block_per_process = np.prod(npts_local_per_block_per_process, axis=-1) #indexed [b,k] for block b and process k
+    local_sizes_per_block_per_process = xp.prod(npts_local_per_block_per_process, axis=-1) #indexed [b,k] for block b and process k
 
     # Extract Cartesian decomposition of the Block where the node is:
     if isinstance(V, BlockVectorSpace):
@@ -210,12 +210,12 @@ def psydac_to_petsc_global(
     if ndim == 1:
         if cart.comm:
             # Find to which process the node belongs to:
-            proc_index = np.nonzero(np.array([jj[0] in range(gs[0][k],ge[0][k]+1) for k in range(gs[0].size)]))[0][0]
+            proc_index = xp.nonzero(xp.array([jj[0] in range(gs[0][k],ge[0][k]+1) for k in range(gs[0].size)]))[0][0]
         else:
             proc_index = 0
        
         # Find the index shift corresponding to the block and the owner process:
-        index_shift = 0 + np.sum(local_sizes_per_block_per_process[:,:proc_index]) + np.sum(local_sizes_per_block_per_process[:bb,proc_index])
+        index_shift = 0 + xp.sum(local_sizes_per_block_per_process[:,:proc_index]) + xp.sum(local_sizes_per_block_per_process[:bb,proc_index])
 
         # Compute the global PETSc index:
         global_index = index_shift + jj[0] - gs[0][proc_index]
@@ -223,15 +223,15 @@ def psydac_to_petsc_global(
     elif ndim == 2:
         if cart.comm:
             # Find to which process the node belongs to:
-            proc_x = np.nonzero(np.array([jj[0] in range(gs[0][k],ge[0][k]+1) for k in range(gs[0].size)]))[0][0]
-            proc_y = np.nonzero(np.array([jj[1] in range(gs[1][k],ge[1][k]+1) for k in range(gs[1].size)]))[0][0]
+            proc_x = xp.nonzero(xp.array([jj[0] in range(gs[0][k],ge[0][k]+1) for k in range(gs[0].size)]))[0][0]
+            proc_y = xp.nonzero(xp.array([jj[1] in range(gs[1][k],ge[1][k]+1) for k in range(gs[1].size)]))[0][0]
         else:
             proc_x = 0
             proc_y = 0
 
         proc_index = proc_y + proc_x*nprocs[1]
         # Find the index shift corresponding to the block and the owner process:
-        index_shift = 0 + np.sum(local_sizes_per_block_per_process[:,:proc_index]) + np.sum(local_sizes_per_block_per_process[:bb,proc_index])
+        index_shift = 0 + xp.sum(local_sizes_per_block_per_process[:,:proc_index]) + xp.sum(local_sizes_per_block_per_process[:bb,proc_index])
 
         # Compute the global PETSc index:
         global_index = index_shift + jj[1] - gs[1][proc_y] + (jj[0] - gs[0][proc_x]) * npts_local_per_block_per_process[bb,proc_index,1]
@@ -239,9 +239,9 @@ def psydac_to_petsc_global(
     elif ndim == 3:
         if cart.comm:
             # Find to which process the node belongs to:
-            proc_x = np.nonzero(np.array([jj[0] in range(gs[0][k],ge[0][k]+1) for k in range(gs[0].size)]))[0][0]
-            proc_y = np.nonzero(np.array([jj[1] in range(gs[1][k],ge[1][k]+1) for k in range(gs[1].size)]))[0][0]
-            proc_z = np.nonzero(np.array([jj[2] in range(gs[2][k],ge[2][k]+1) for k in range(gs[2].size)]))[0][0]
+            proc_x = xp.nonzero(xp.array([jj[0] in range(gs[0][k],ge[0][k]+1) for k in range(gs[0].size)]))[0][0]
+            proc_y = xp.nonzero(xp.array([jj[1] in range(gs[1][k],ge[1][k]+1) for k in range(gs[1].size)]))[0][0]
+            proc_z = xp.nonzero(xp.array([jj[2] in range(gs[2][k],ge[2][k]+1) for k in range(gs[2].size)]))[0][0]
         else:
             proc_x = 0
             proc_y = 0
@@ -250,7 +250,7 @@ def psydac_to_petsc_global(
         proc_index = proc_z + proc_y*nprocs[2] + proc_x*nprocs[1]*nprocs[2]
 
         # Find the index shift corresponding to the block and the owner process:
-        index_shift = 0 + np.sum(local_sizes_per_block_per_process[:,:proc_index]) + np.sum(local_sizes_per_block_per_process[:bb,proc_index])
+        index_shift = 0 + xp.sum(local_sizes_per_block_per_process[:,:proc_index]) + xp.sum(local_sizes_per_block_per_process[:bb,proc_index])
 
         # Compute the global PETSc index:
         global_index = index_shift \
@@ -368,7 +368,7 @@ def vec_topetsc(vec):
     globalsize = vec.space.dimension
 
     # Sum over the blocks to get the total local size
-    localsize = np.sum(np.prod(npts_local, axis=1))
+    localsize = xp.sum(xp.prod(npts_local, axis=1))
 
     gvec  = PETSc.Vec().create(comm=carts[0].global_comm)    
 
@@ -411,9 +411,9 @@ def vec_topetsc(vec):
                         petsc_data.append(value)
 
         elif ndims[b] == 3:
-            for i1 in np.arange(npts_local[b][0]):             
-                for i2 in np.arange(npts_local[b][1]):
-                    for i3 in np.arange(npts_local[b][2]):
+            for i1 in xp.arange(npts_local[b][0]):             
+                for i2 in xp.arange(npts_local[b][1]):
+                    for i3 in xp.arange(npts_local[b][2]):
                         value = vec_block._data[i1 + ghost_size[0], i2 + ghost_size[1], i3 + ghost_size[2]]
                         if value != 0:
                             i1_n = s[0] + i1
@@ -469,8 +469,8 @@ def mat_topetsc(mat):
     cnpts_local = get_npts_local(mat.codomain) # indexed [block, dimension]. Different for each process. 
 
     # Get the number of points per block, per process and per dimension:
-    dnpts_per_block_per_process = np.array(get_npts_per_block(mat.domain)) # global variable, indexed as [block, process, dimension]
-    cnpts_per_block_per_process = np.array(get_npts_per_block(mat.codomain)) # global variable, indexed as [block, process, dimension]
+    dnpts_per_block_per_process = xp.array(get_npts_per_block(mat.domain)) # global variable, indexed as [block, process, dimension]
+    cnpts_per_block_per_process = xp.array(get_npts_per_block(mat.codomain)) # global variable, indexed as [block, process, dimension]
 
     # Get the index shift for each block and each process:
     dindex_shift = get_index_shift_per_block_per_process(mat.domain) # global variable, indexed as [block, process, dimension]
@@ -479,7 +479,7 @@ def mat_topetsc(mat):
     globalsize = mat.shape
 
     # Sum over the blocks to get the total local size
-    localsize = (np.sum(np.prod(cnpts_local, axis=1)), np.sum(np.prod(dnpts_local, axis=1)))
+    localsize = (xp.sum(xp.prod(cnpts_local, axis=1)), xp.sum(xp.prod(dnpts_local, axis=1)))
 
     gmat  = PETSc.Mat().create(comm=comm)
 
diff --git a/psydac/linalg/utilities.py b/psydac/linalg/utilities.py
index 57a9a6b86..6aa3ccf4d 100644
--- a/psydac/linalg/utilities.py
+++ b/psydac/linalg/utilities.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 
-import numpy as np
+import cunumpy as xp
 from math import sqrt
 
 from psydac.linalg.basic   import Vector
@@ -114,11 +114,11 @@ def petsc_to_psydac(x, Xh, out=None):
 
         # Find shift for process k:
         # ..get number of points for each block, each process and each dimension:
-        npts_local_per_block_per_process = np.array(get_npts_per_block(Xh)) #indexed [b,k,d] for block b and process k and dimension d
+        npts_local_per_block_per_process = xp.array(get_npts_per_block(Xh)) #indexed [b,k,d] for block b and process k and dimension d
         # ..get local sizes for each block and each process:
-        local_sizes_per_block_per_process = np.prod(npts_local_per_block_per_process, axis=-1) #indexed [b,k] for block b and process k
+        local_sizes_per_block_per_process = xp.prod(npts_local_per_block_per_process, axis=-1) #indexed [b,k] for block b and process k
         # ..sum the sizes over all the blocks and the previous processes:
-        index_shift = 0 + np.sum(local_sizes_per_block_per_process[:,:comm.Get_rank()], dtype=int) #global variable
+        index_shift = 0 + xp.sum(local_sizes_per_block_per_process[:,:comm.Get_rank()], dtype=int) #global variable
 
         for local_petsc_index in range(localsize):
             block_index, psydac_index = petsc_local_to_psydac(Xh, local_petsc_index)
@@ -143,11 +143,11 @@ def petsc_to_psydac(x, Xh, out=None):
 
         # Find shift for process k:
         # ..get number of points for each process and each dimension:
-        npts_local_per_block_per_process = np.array(get_npts_per_block(Xh))[0] #indexed [k,d] for process k and dimension d
+        npts_local_per_block_per_process = xp.array(get_npts_per_block(Xh))[0] #indexed [k,d] for process k and dimension d
         # ..get local sizes for each process:
-        local_sizes_per_block_per_process = np.prod(npts_local_per_block_per_process, axis=-1) #indexed [k] for process k
+        local_sizes_per_block_per_process = xp.prod(npts_local_per_block_per_process, axis=-1) #indexed [k] for process k
         # ..sum the sizes over all the previous processes:
-        index_shift = 0 + np.sum(local_sizes_per_block_per_process[:comm.Get_rank()], dtype=int) #global variable
+        index_shift = 0 + xp.sum(local_sizes_per_block_per_process[:comm.Get_rank()], dtype=int) #global variable
 
         for local_petsc_index in range(localsize):
             block_index, psydac_index = petsc_local_to_psydac(Xh, local_petsc_index) 
@@ -184,17 +184,17 @@ def _sym_ortho(a, b):
            http://www.stanford.edu/group/SOL/dissertations/sou-cheng-choi-thesis.pdf
     """
     if b == 0:
-        return np.sign(a), 0, abs(a)
+        return xp.sign(a), 0, abs(a)
     elif a == 0:
-        return 0, np.sign(b), abs(b)
+        return 0, xp.sign(b), abs(b)
     elif abs(b) > abs(a):
         tau = a / b
-        s = np.sign(b) / sqrt(1 + tau * tau)
+        s = xp.sign(b) / sqrt(1 + tau * tau)
         c = s * tau
         r = b / s
     else:
         tau = b / a
-        c = np.sign(a) / sqrt(1+tau*tau)
+        c = xp.sign(a) / sqrt(1+tau*tau)
         s = c * tau
         r = a / c
     return c, s, r
diff --git a/psydac/utilities/quadratures.py b/psydac/utilities/quadratures.py
index fa99a273f..7e65a76af 100644
--- a/psydac/utilities/quadratures.py
+++ b/psydac/utilities/quadratures.py
@@ -7,7 +7,7 @@
 with weights equal to 1
 """
 
-import numpy as np
+import cunumpy as xp
 
 from math import cos, pi
 from numpy import zeros
@@ -78,11 +78,11 @@ def gauss_lobatto(k):
     Returns nodal abscissas {x} and weights {A} of
     Gauss-Legendre m-point quadrature.
     """
-    beta = .5 / np.sqrt(1-(2 * np.arange(1., k + 1)) ** (-2)) #3-term recurrence coeffs
-    beta[-1] = np.sqrt((k / (2 * k-1.)))
-    T = np.diag(beta, 1) + np.diag(beta, -1) # jacobi matrix
-    D, V = np.linalg.eig(T) # eigenvalue decomposition
-    xg = np.real(D); i = xg.argsort(); xg.sort() # nodes (= Legendres points)
+    beta = .5 / xp.sqrt(1-(2 * xp.arange(1., k + 1)) ** (-2)) #3-term recurrence coeffs
+    beta[-1] = xp.sqrt((k / (2 * k-1.)))
+    T = xp.diag(beta, 1) + xp.diag(beta, -1) # jacobi matrix
+    D, V = xp.linalg.eig(T) # eigenvalue decomposition
+    xg = xp.real(D); i = xg.argsort(); xg.sort() # nodes (= Legendres points)
     w = 2 * (V[0, :]) ** 2; # weights
 
     return xg, w[i]
@@ -102,8 +102,8 @@ def quadrature(a, k, method="legendre"):
 
     grid = a
     N = len(a)
-    xgl = np.zeros((N-1, k + 1))
-    wgl = np.zeros((N-1, k + 1))
+    xgl = xp.zeros((N-1, k + 1))
+    wgl = xp.zeros((N-1, k + 1))
     for i in range (0, N-1):
         xmin = grid[i];xmax = grid[i + 1];dx = 0.5 * (xmax-xmin)
         tab = dx * x + dx + xmin
diff --git a/psydac/utilities/utils.py b/psydac/utilities/utils.py
index c056c8788..0fa9e6b40 100644
--- a/psydac/utilities/utils.py
+++ b/psydac/utilities/utils.py
@@ -2,7 +2,7 @@
 #
 # Copyright 2018 Yaman Güçlü
 
-import numpy as np
+import cunumpy as xp
 from numbers import Number
 
 __all__ = (
@@ -27,7 +27,7 @@ def is_real(x):
         True if x is real, False otherwise.
 
     """
-    return isinstance(x, Number) and np.isrealobj(x) and not isinstance(x, bool)
+    return isinstance(x, Number) and xp.isrealobj(x) and not isinstance(x, bool)
 
 #===============================================================================
 def refine_array_1d(x, n, remove_duplicates=True):
@@ -57,10 +57,10 @@ def refine_array_1d(x, n, remove_duplicates=True):
     if not remove_duplicates:
         n += 1
     for (a, b) in zip(x[:-1], x[1:]):
-        xr.extend(np.linspace(a, b, n, endpoint=not remove_duplicates))
+        xr.extend(xp.linspace(a, b, n, endpoint=not remove_duplicates))
     if remove_duplicates:
         xr.append(x[-1])
-    return np.array(xr)
+    return xp.array(xr)
 
 #===============================================================================
 def unroll_edges(domain, xgrid):
@@ -69,7 +69,7 @@ def unroll_edges(domain, xgrid):
 
     xA, xB = domain
 
-    assert all(np.diff(xgrid) >= 0)
+    assert all(xp.diff(xgrid) >= 0)
     assert xA < xB
     assert xA <= xgrid[0]
     assert xgrid[-1] <= xB
@@ -78,10 +78,10 @@ def unroll_edges(domain, xgrid):
         return xgrid
 
     elif xgrid[0] != xA:
-        return np.array([xgrid[-1] - (xB-xA), *xgrid])
+        return xp.array([xgrid[-1] - (xB-xA), *xgrid])
 
     elif xgrid[-1] != xB:
-        return np.array([*xgrid, xgrid[0] + (xB-xA)])
+        return xp.array([*xgrid, xgrid[0] + (xB-xA)])
 
 #===============================================================================
 def roll_edges(domain, points):
@@ -139,14 +139,14 @@ def animate_field(fields, domain, mapping, res=(150,150), vrange=None, cmap=None
     ax.set_aspect('equal')
 
     etas    = [refine_array_1d( bounds, r ) for r,bounds in zip(res, zip(domain.min_coords, domain.max_coords))]
-    pcoords = np.array( [[mapping( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
+    pcoords = xp.array( [[mapping( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
     xx      = pcoords[:,:,0]
     yy      = pcoords[:,:,1]
 
     # determine range of values from first field
-    num1     = np.array( [[fields[0].fields[0]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
-    num2     = np.array( [[fields[0].fields[1]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
-    num      = np.hypot(num1, num2)
+    num1     = xp.array( [[fields[0].fields[0]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
+    num2     = xp.array( [[fields[0].fields[1]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
+    num      = xp.hypot(num1, num2)
     vrange   = (num.min(), num.max())
 
     quadmesh = plt.pcolormesh(xx, yy, num, shading='gouraud', cmap=cmap,
@@ -155,9 +155,9 @@ def animate_field(fields, domain, mapping, res=(150,150), vrange=None, cmap=None
 
     pbar = tqdm.tqdm(total=len(fields))
     def anim_func(i):
-        num1     = np.array( [[fields[i].fields[0]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
-        num2     = np.array( [[fields[i].fields[1]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
-        C        = np.hypot(num1, num2)
+        num1     = xp.array( [[fields[i].fields[0]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
+        num2     = xp.array( [[fields[i].fields[1]( e1,e2 ) for e2 in etas[1]] for e1 in etas[0]] )
+        C        = xp.hypot(num1, num2)
         quadmesh.set_array(C)
         pbar.update()
         if i == len(fields) - 1:
diff --git a/pyproject.toml b/pyproject.toml
index e7717ce09..f31c47ea5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies = [
     'pyyaml >= 5.1',
     'packaging',
     'pyevtk',
+    'cunumpy',
 
     # Our packages from PyPi
     'pyccel >= 2.0.1',