diff --git a/src/struphy/bsplines/bsplines.py b/src/struphy/bsplines/bsplines.py
index 9974a9ff2..bdcf23f87 100644
--- a/src/struphy/bsplines/bsplines.py
+++ b/src/struphy/bsplines/bsplines.py
@@ -611,8 +611,8 @@ def make_knots(breaks, degree, periodic):
 
     if periodic:
         period = breaks[-1] - breaks[0]
-        T[0:p] = [xi - period for xi in breaks[-p - 1 : -1]]
-        T[-p:] = [xi + period for xi in breaks[1 : p + 1]]
+        T[0:p] = xp.asarray([xi - period for xi in breaks[-p - 1 : -1]])
+        T[-p:] = xp.asarray([xi + period for xi in breaks[1 : p + 1]])
     else:
         T[0:p] = breaks[0]
         T[-p:] = breaks[-1]
diff --git a/src/struphy/feec/basis_projection_ops.py b/src/struphy/feec/basis_projection_ops.py
index ab0925828..97ded4aec 100644
--- a/src/struphy/feec/basis_projection_ops.py
+++ b/src/struphy/feec/basis_projection_ops.py
@@ -51,7 +51,7 @@ def __init__(self, derham, domain, verbose=True, **weights):
 
         self._rank = derham.comm.Get_rank() if derham.comm is not None else 0
 
-        if xp.any([p == 1 and Nel > 1 for p, Nel in zip(derham.p, derham.Nel)]):
+        if xp.any(xp.array([p == 1 and Nel > 1 for p, Nel in zip(derham.p, derham.Nel)])):
             if MPI.COMM_WORLD.Get_rank() == 0:
                 print(
                     f'\nWARNING: Class "BasisProjectionOperators" called with p={derham.p} (interpolation of piece-wise constants should be avoided).',
diff --git a/src/struphy/feec/psydac_derham.py b/src/struphy/feec/psydac_derham.py
index e5a8cde32..0eaaa0f25 100644
--- a/src/struphy/feec/psydac_derham.py
+++ b/src/struphy/feec/psydac_derham.py
@@ -2,6 +2,7 @@
 import importlib.metadata
 
 import cunumpy as xp
+import numpy as np
 import psydac.core.bsplines as bsp
 from psydac.ddm.cart import DomainDecomposition
 from psydac.ddm.mpi import MockComm, MockMPI
@@ -117,7 +118,7 @@ def __init__(
         if dirichlet_bc is not None:
             assert len(dirichlet_bc) == 3
             # make sure that boundary conditions are compatible with spline space
-            assert xp.all([bc == (False, False) for i, bc in enumerate(dirichlet_bc) if spl_kind[i]])
+            assert xp.all(xp.array([bc == (False, False) for i, bc in enumerate(dirichlet_bc) if spl_kind[i]]))
 
         self._dirichlet_bc = dirichlet_bc
 
@@ -2789,12 +2790,17 @@ def get_pts_and_wts(space_1d, start, end, n_quad=None, polar_shift=False):
             union_breaks = space_1d.breaks[:-1]
 
         # Make union of Greville and break points
-        tmp = set(xp.round(space_1d.histopolation_grid, decimals=14)).union(
-            xp.round(union_breaks, decimals=14),
+        # tmp = set(xp.round(space_1d.histopolation_grid, decimals=14)).union(
+        #     xp.round(union_breaks, decimals=14),
+        # )
+        # tmp = list(tmp)
+        # tmp.sort()
+        # tmp_a = xp.array(tmp)
+
+        tmp = set(xp.round(space_1d.histopolation_grid, decimals=14).tolist()).union(
+            xp.round(union_breaks, decimals=14).tolist()
         )
-
-        tmp = list(tmp)
-        tmp.sort()
+        tmp = sorted(tmp)
         tmp_a = xp.array(tmp)
 
         x_grid = tmp_a[
@@ -2822,7 +2828,13 @@ def get_pts_and_wts(space_1d, start, end, n_quad=None, polar_shift=False):
             # products of basis functions are integrated exactly
             n_quad = space_1d.degree + 1
 
-        pts_loc, wts_loc = xp.polynomial.legendre.leggauss(n_quad)
+        pts_loc, wts_loc = np.polynomial.legendre.leggauss(n_quad)
+
+        if "cupy" in xp.__name__:
+            import cupy as cp
+
+            pts_loc = cp.array(pts_loc)
+            wts_loc = cp.array(wts_loc)
 
         x, wts = bsp.quadrature_grid(x_grid, pts_loc, wts_loc)
 
diff --git a/src/struphy/geometry/base.py b/src/struphy/geometry/base.py
index d2b21688e..96a374ef3 100644
--- a/src/struphy/geometry/base.py
+++ b/src/struphy/geometry/base.py
@@ -12,6 +12,7 @@
 from struphy.geometry import evaluation_kernels, transform_kernels
 from struphy.kernel_arguments.pusher_args_kernels import DomainArguments
 from struphy.linear_algebra import linalg_kron
+from struphy.utils.pyccel import Pyccelkernel
 
 
 class Domain(metaclass=ABCMeta):
@@ -769,8 +770,8 @@ def _evaluate_metric_coefficient(self, *etas, which=0, **kwargs):
 
             # to keep C-ordering the (3, 3)-part is in the last indices
             out = xp.empty((markers.shape[0], 3, 3), dtype=float)
-
-            n_inside = evaluation_kernels.kernel_evaluate_pic(
+            kernel = Pyccelkernel(evaluation_kernels.kernel_evaluate_pic)
+            n_inside = kernel(
                 markers,
                 which,
                 self.args_domain,
@@ -813,7 +814,8 @@ def _evaluate_metric_coefficient(self, *etas, which=0, **kwargs):
                 (E1.shape[0], E2.shape[1], E3.shape[2], 3, 3),
                 dtype=float,
             )
-            evaluation_kernels.kernel_evaluate(
+            kernel = Pyccelkernel(evaluation_kernels.kernel_evaluate)
+            kernel(
                 E1,
                 E2,
                 E3,
diff --git a/src/struphy/geometry/evaluation_kernels.py b/src/struphy/geometry/evaluation_kernels.py
index 4f97b9ce9..6b7995aec 100644
--- a/src/struphy/geometry/evaluation_kernels.py
+++ b/src/struphy/geometry/evaluation_kernels.py
@@ -3,7 +3,7 @@
 corresponding to mappings (x, y, z) = F(eta_1, eta_2, eta_3).
 """
 
-from numpy import empty, shape, zeros
+from cupy import empty, shape, zeros
 from pyccel.decorators import stack_array
 
 import struphy.geometry.mappings_kernels as mappings_kernels
diff --git a/src/struphy/geometry/transform_kernels.py b/src/struphy/geometry/transform_kernels.py
index f9e6d8077..667c2528a 100644
--- a/src/struphy/geometry/transform_kernels.py
+++ b/src/struphy/geometry/transform_kernels.py
@@ -43,7 +43,14 @@
 - 2-form --> vector : (a_1, a_2, a_3) =          (a^2_1, a^2_2, a^2_3) / |det(DF)|
 """
 
-from numpy import empty, shape, sqrt, zeros
+import cunumpy as np
+from cunumpy.xp import array_backend
+
+if array_backend.backend == "cupy":
+    from cupy import empty, shape, sqrt, zeros
+else:
+    from numpy import empty, shape, sqrt, zeros
+
 from pyccel.decorators import stack_array
 
 import struphy.geometry.evaluation_kernels as evaluation_kernels
diff --git a/src/struphy/io/output_handling.py b/src/struphy/io/output_handling.py
index 808c2bcf3..4df9fa5f5 100644
--- a/src/struphy/io/output_handling.py
+++ b/src/struphy/io/output_handling.py
@@ -1,8 +1,8 @@
 import ctypes
 import os
 
-import cunumpy as xp
 import h5py
+import numpy as np
 
 
 class DataContainer:
@@ -82,11 +82,11 @@ def add_data(self, data_dict):
         Parameters
         ----------
         data_dict : dict
-            Name-object pairs to save during time stepping, e.g. {key : val}. key must be a string and val must be a xp.array of fixed shape. Scalar values (floats) must therefore be passed as 1d arrays of size 1.
+            Name-object pairs to save during time stepping, e.g. {key : val}. key must be a string and val must be a np.array of fixed shape. Scalar values (floats) must therefore be passed as 1d arrays of size 1.
         """
 
         for key, val in data_dict.items():
-            assert isinstance(val, xp.ndarray)
+            assert isinstance(val, np.ndarray)
 
             # if dataset already exists, check for compatibility with given array
             if key in self._dset_dict:
diff --git a/src/struphy/kernel_arguments/pusher_args_kernels.py b/src/struphy/kernel_arguments/pusher_args_kernels.py
index d80a5058b..03d9e5bff 100644
--- a/src/struphy/kernel_arguments/pusher_args_kernels.py
+++ b/src/struphy/kernel_arguments/pusher_args_kernels.py
@@ -1,5 +1,5 @@
 # from numpy import copy
-from numpy import empty
+import cunumpy as xp
 
 
 class MarkerArguments:
@@ -99,12 +99,12 @@ def __init__(
         self.tn3 = tn3
         self.starts = starts
 
-        self.bn1 = empty(pn[0] + 1, dtype=float)
-        self.bn2 = empty(pn[1] + 1, dtype=float)
-        self.bn3 = empty(pn[2] + 1, dtype=float)
-        self.bd1 = empty(pn[0], dtype=float)
-        self.bd2 = empty(pn[1], dtype=float)
-        self.bd3 = empty(pn[2], dtype=float)
+        self.bn1 = xp.empty(int(pn[0] + 1), dtype=float)
+        self.bn2 = xp.empty(int(pn[1] + 1), dtype=float)
+        self.bn3 = xp.empty(int(pn[2] + 1), dtype=float)
+        self.bd1 = xp.empty(int(pn[0]), dtype=float)
+        self.bd2 = xp.empty(int(pn[1]), dtype=float)
+        self.bd3 = xp.empty(int(pn[2]), dtype=float)
 
 
 class DomainArguments:
diff --git a/src/struphy/main.py b/src/struphy/main.py
index 047abea95..b513f81ac 100644
--- a/src/struphy/main.py
+++ b/src/struphy/main.py
@@ -254,12 +254,25 @@ def run(
                 absB0 = model.equil.absB0(*grids_log)
                 pointData["absB0"] = absB0
 
-        gridToVTK(os.path.join(path_out, "geometry"), *grids_phy, pointData=pointData)
+        from cunumpy.xp import array_backend
+
+        print("calling gridToVTK")
+        if array_backend.backend == "numpy":
+            gridToVTK(os.path.join(path_out, "geometry"), *grids_phy, pointData=pointData)
+        else:
+            # cupy
+            grids_phy_cpu = [g.get() if isinstance(g, xp.ndarray) else g for g in grids_phy]
+
+            # Convert pointData values to NumPy
+            pointData_cpu = {k: (v.get() if isinstance(v, xp.ndarray) else v) for k, v in pointData.items()}
+
+            # Now call gridToVTK safely
+            gridToVTK(os.path.join(path_out, "geometry"), *grids_phy_cpu, pointData=pointData_cpu)
 
     # data object for saving (will either create new hdf5 files if restart==False or open existing files if restart==True)
     # use MPI.COMM_WORLD as communicator when storing the outputs
     data = DataContainer(path_out, comm=comm)
-
+    print("ccc")
     # time quantities (current time value, value in seconds and index)
     time_state = {}
     time_state["value"] = xp.zeros(1, dtype=float)
@@ -270,8 +283,17 @@ def run(
     for key, val in time_state.items():
         key_time = "time/" + key
         key_time_restart = "restart/time/" + key
-        data.add_data({key_time: val})
-        data.add_data({key_time_restart: val})
+        if array_backend.backend == "numpy":
+            data.add_data({key_time: val})
+            data.add_data({key_time_restart: val})
+        else:
+            val_cpu = val.get()  #  if isinstance(val, xp.ndarray) else val
+
+            # Then assign
+            print(f"{val_cpu = } {type(val_cpu) = }")
+            data.add_data({key_time: val_cpu})
+            # self._file[key][0] = val_cpu[0]
+            data.add_data({key_time_restart: val_cpu})
 
     # retrieve time parameters
     dt = time_opts.dt
diff --git a/src/struphy/models/base.py b/src/struphy/models/base.py
index eb133cb45..8058a5d0a 100644
--- a/src/struphy/models/base.py
+++ b/src/struphy/models/base.py
@@ -5,6 +5,7 @@
 from functools import reduce
 from textwrap import indent
 
+import numpy as np
 import cunumpy as xp
 import yaml
 from line_profiler import profile
@@ -521,11 +522,12 @@ def update_scalar(self, name, value=None):
 
         if summands is None:
             # Ensure the value is a float if there are no summands
-            assert isinstance(value, float)
-
-            # Create a numpy array to hold the scalar value
-            value_array = xp.array([value], dtype=xp.float64)
-
+            if isinstance(value, float):
+                # Create a numpy array to hold the scalar value
+                value_array = np.array([value])
+            else:
+                value_array = np.asarray(value)
+            value_array = np.array(value_array)
             # Perform MPI operations based on the compute flags
             if "sum_world" in compute_operations and not isinstance(MPI, MockMPI):
                 MPI.COMM_WORLD.Allreduce(
@@ -562,11 +564,14 @@ def update_scalar(self, name, value=None):
 
             if "divide_n_mks" in compute_operations:
                 # Initialize the total number of markers
-                n_mks_tot = xp.array([variable.particles.Np])
+                n_mks_tot = np.array([variable.particles.Np])
                 value_array /= n_mks_tot
 
             # Update the scalar value
-            self._scalar_quantities[name]["value"][0] = value_array[0]
+            if value_array.ndim == 0:
+                self._scalar_quantities[name]["value"][0] = value_array.item()
+            else:
+                self._scalar_quantities[name]["value"][0] = value_array[0]
 
         else:
             # Sum the values of the summands
@@ -776,7 +781,7 @@ def update_distr_functions(self):
                     h2 = 1 / obj.boxes_per_dim[1]
                     h3 = 1 / obj.boxes_per_dim[2]
 
-                    ndim = xp.count_nonzero([d > 1 for d in obj.boxes_per_dim])
+                    ndim = xp.count_nonzero(xp.array([d > 1 for d in obj.boxes_per_dim]))
                     if ndim == 0:
                         kernel_type = "gaussian_3d"
                     else:
diff --git a/src/struphy/models/toy.py b/src/struphy/models/toy.py
index fd36b5d5f..5673c07a8 100644
--- a/src/struphy/models/toy.py
+++ b/src/struphy/models/toy.py
@@ -1,3 +1,4 @@
+import numpy as np
 import cunumpy as xp
 from psydac.ddm.mpi import mpi as MPI
 
@@ -157,7 +158,7 @@ def velocity_scale(self):
         return "cyclotron"
 
     def allocate_helpers(self):
-        self._tmp = xp.empty(1, dtype=float)
+        self._tmp = np.empty(1, dtype=float)
 
     def update_scalar_quantities(self):
         particles = self.kinetic_ions.var.particles
diff --git a/src/struphy/ode/utils.py b/src/struphy/ode/utils.py
index 6748d07f1..f650041d6 100644
--- a/src/struphy/ode/utils.py
+++ b/src/struphy/ode/utils.py
@@ -77,7 +77,8 @@ def __post_init__(self):
         self._a = xp.tri(self.n_stages, k=-1)
         for l, st in enumerate(a):
             assert len(st) == l + 1
-            self._a[l + 1, : l + 1] = st
+
+            self._a[l + 1, : l + 1] = xp.array(st)
 
         self._conv_rate = conv_rate
 
diff --git a/src/struphy/pic/base.py b/src/struphy/pic/base.py
index 84900418d..9a88ac318 100644
--- a/src/struphy/pic/base.py
+++ b/src/struphy/pic/base.py
@@ -235,7 +235,7 @@ def __init__(
             assert all([nboxes % nproc == 0 for nboxes, nproc in zip(self.boxes_per_dim, self.nprocs)]), (
                 f"Number of boxes {self.boxes_per_dim =} must be divisible by number of processes {self.nprocs =} in each direction."
             )
-            n_boxes = xp.prod(self.boxes_per_dim, dtype=int) * self.num_clones
+            n_boxes = xp.prod(xp.array(self.boxes_per_dim), dtype=int) * self.num_clones
 
         # if verbose:
         #     print(f"\n{self.mpi_rank = }, {n_boxes = }")
@@ -1095,7 +1095,7 @@ def _allocate_marker_array(self):
         bufsize = self.bufsize + 1.0 / xp.sqrt(n_mks_load_loc)
 
         # allocate markers array (3 x positions, vdim x velocities, weight, s0, w0, ..., ID) with buffer
-        self._n_rows = round(n_mks_load_loc * (1 + bufsize))
+        self._n_rows = round(float(n_mks_load_loc * (1 + bufsize)))
         self._markers = xp.zeros((self.n_rows, self.n_cols), dtype=float)
 
         # allocate auxiliary arrays
@@ -1917,7 +1917,7 @@ def binning(
             The reconstructed delta-f distribution function.
         """
 
-        assert xp.count_nonzero(components) == len(bin_edges)
+        assert xp.count_nonzero(xp.array(components)) == len(bin_edges)
 
         # volume of a bin
         bin_vol = 1.0
@@ -2424,7 +2424,7 @@ def _set_boxes(self):
             n_particles = self._markers_shape[0]
             n_mkr = int(n_particles / n_box_in) + 1
             n_cols = round(
-                n_mkr * (1 + 1 / xp.sqrt(n_mkr) + self._box_bufsize),
+                float(n_mkr) * (1 + 1 / float(xp.sqrt(n_mkr)) + self._box_bufsize),
             )
 
             # cartesian boxes
@@ -2643,7 +2643,17 @@ def check_and_assign_particles_to_boxes(self):
         """Check whether the box array has enough columns (detect load imbalance wrt to sorting boxes),
         and then assigne the particles to boxes."""
 
-        bcount = xp.bincount(xp.int64(self.markers_wo_holes[:, -2]))
+        from cunumpy.xp import array_backend
+
+        if array_backend.backend == "numpy":
+            bcount = xp.bincount(xp.int64(self.markers_wo_holes[:, -2]))
+        else:
+            import cupy as cp
+
+            indices = self.markers_wo_holes[:, -2]
+            indices = indices.astype(cp.int64)
+            bcount = cp.bincount(indices)
+
         max_in_box = xp.max(bcount)
         if max_in_box > self._sorting_boxes.boxes.shape[1]:
             warnings.warn(
@@ -4049,6 +4059,7 @@ def _gather_scalar_in_subcomm_array(self, scalar: int, out: xp.ndarray = None):
         _tmp[self.mpi_rank] = scalar
 
         if self.mpi_comm is not None:
+            print(f"{self.mpi_comm = }")
             self.mpi_comm.Allgather(
                 _tmp[self.mpi_rank],
                 _tmp,
diff --git a/src/struphy/propagators/propagators_markers.py b/src/struphy/propagators/propagators_markers.py
index f1dbbe5f6..6227dd385 100644
--- a/src/struphy/propagators/propagators_markers.py
+++ b/src/struphy/propagators/propagators_markers.py
@@ -104,7 +104,7 @@ def allocate(self):
         import cunumpy as xp
 
         butcher._a = xp.diag(butcher.a, k=-1)
-        butcher._a = xp.array(list(butcher.a) + [0.0])
+        butcher._a = xp.concatenate([xp.asarray(butcher.a), xp.array([0.0])])
 
         args_kernel = (
             butcher.a,
@@ -867,7 +867,7 @@ def allocate(self):
             import cunumpy as xp
 
             butcher._a = xp.diag(butcher.a, k=-1)
-            butcher._a = xp.array(list(butcher.a) + [0.0])
+            butcher._a = xp.concatenate([xp.asarray(butcher.a), xp.array([0.0])])
 
             kernel = Pyccelkernel(pusher_kernels_gc.push_gc_bxEstar_explicit_multistage)
 
@@ -1315,7 +1315,7 @@ def allocate(self):
             import cunumpy as xp
 
             butcher._a = xp.diag(butcher.a, k=-1)
-            butcher._a = xp.array(list(butcher.a) + [0.0])
+            butcher._a = xp.concatenate([xp.asarray(butcher.a), xp.array([0.0])])
 
             kernel = Pyccelkernel(pusher_kernels_gc.push_gc_Bstar_explicit_multistage)
 
diff --git a/src/struphy/utils/pyccel.py b/src/struphy/utils/pyccel.py
index 5e62426a3..defa214c4 100644
--- a/src/struphy/utils/pyccel.py
+++ b/src/struphy/utils/pyccel.py
@@ -1,14 +1,22 @@
 from typing import Any, Callable
 
+import cunumpy as xp
+
 
 class Pyccelkernel:
     def __init__(self, kernel: Callable[..., Any], use_cupy: bool = False) -> None:
         self._kernel = kernel
         self._use_cupy = use_cupy
+        if "cupy" in xp.__name__:
+            self._use_cupy = True
 
     def __call__(self, *args: Any, **kwargs: Any) -> Any:
         if self.use_cupy:
-            raise NotImplementedError
+            # Convert all args from CuPy to NumPy
+            args_np = [x.get() if isinstance(x, xp.ndarray) else x for x in args]
+            # Convert all kwargs from CuPy to NumPy
+            kwargs_np = {k: v.get() if isinstance(v, xp.ndarray) else v for k, v in kwargs.items()}
+            return self._kernel(*args_np, **kwargs_np)
         else:
             return self._kernel(*args, **kwargs)