diff --git a/gyrokinetic/apps/gk_species_damping.c b/gyrokinetic/apps/gk_species_damping.c
index cbc907ddfd..1eab86fd1d 100644
--- a/gyrokinetic/apps/gk_species_damping.c
+++ b/gyrokinetic/apps/gk_species_damping.c
@@ -1,16 +1,24 @@
 #include <assert.h>
-#include <gkyl_gyrokinetic_priv.h>
-#include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_alloc.h>
 #include <gkyl_dg_basis_ops.h>
+#include <gkyl_gyrokinetic_priv.h>
+
+static void
+proj_on_basis_c2p_position_func(const double *xcomp, double *xphys, void *ctx)
+{
+  struct gk_proj_on_basis_c2p_func_ctx *c2p_ctx = ctx;
+  gkyl_position_map_eval_mc2nu(c2p_ctx->pos_map, xcomp, xphys);
+}
 
 void
-gk_species_damping_write_disabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
 }
 
 void
-gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_enabled(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   struct timespec wst = gkyl_wall_clock();
   // DG metadata for damping rate.
@@ -18,19 +26,21 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g
     { .key = "poly_order", .elem_type = GKYL_MP_UNSIGNED_INT, .uval = 0 },
     { .key = "basis_type", .elem_type = GKYL_MP_STRING, .cval = "serendipity" },
   };
-  int mpe_drate_len = sizeof(mpe_drate)/sizeof(mpe_drate[0]);
+  int mpe_drate_len = sizeof(mpe_drate) / sizeof(mpe_drate[0]);
   // Update app basic metada with time/frame.
   gkyl_msgpack_map_elem_set_double(app->io_meta_basic_len, app->io_meta_basic, "time", tm);
   gkyl_msgpack_map_elem_set_uint(app->io_meta_basic_len, app->io_meta_basic, "frame", frame);
   // Package metadata.
-  int io_meta_len[] = {app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len};
-  const struct gkyl_msgpack_map_elem* io_meta[] = {app->io_meta_basic, mpe_drate, app->gk_geom->io_meta};
-  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len)/sizeof(int), io_meta_len, io_meta);
+  int io_meta_len[] = { app->io_meta_basic_len, mpe_drate_len, app->gk_geom->io_meta_len };
+  const struct gkyl_msgpack_map_elem *io_meta[] = { app->io_meta_basic, mpe_drate,
+                                                    app->gk_geom->io_meta };
+  struct gkyl_msgpack_data *mt = gkyl_msgpack_create_union(sizeof(io_meta_len) / sizeof(int),
+    io_meta_len, io_meta);
 
   // Write out the damping rate.
   const char *fmt = "%s-%s_damping_rate_%d.gkyl";
   int sz = gkyl_calc_strlen(fmt, app->name, gks->info.name, frame);
-  char fileNm[sz+1]; // ensures no buffer overflow
+  char fileNm[sz + 1]; // ensures no buffer overflow
   snprintf(fileNm, sizeof fileNm, fmt, app->name, gks->info.name, frame);
 
   // Copy data from device to host before writing it out.
@@ -40,12 +50,13 @@ gk_species_damping_write_enabled(gkyl_gyrokinetic_app* app, struct gk_species *g
   gkyl_comm_array_write(gks->comm, &gks->grid, &gks->local, mt, gks->damping.rate_host, fileNm);
   app->stat.n_io += 1;
 
-  gkyl_msgpack_data_release(mt); 
+  gkyl_msgpack_data_release(mt);
   app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst);
 }
 
 void
-gk_species_damping_write_init_only(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write_init_only(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm,
+  int frame)
 {
   gk_species_damping_write_enabled(app, gks, tm, frame);
   gks->damping.write_func = gk_species_damping_write_disabled;
@@ -72,127 +83,40 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
   // Default function pointers.
   damp->write_func = gk_species_damping_write_disabled;
 
+  damp->proj_on_basis_c2p_ctx.cdim = app->cdim;
+  damp->proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
+  damp->proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
+  damp->proj_on_basis_c2p_ctx.pos_map = app->position_map;
+
   if (damp->type) {
     // Allocate rate array.
-    damp->rate = mkarr(app->use_gpu, num_quad==1? 1 : gks->basis.num_basis, gks->local_ext.volume);
+    damp->rate = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis,
+      gks->local_ext.volume);
     damp->rate_host = damp->rate;
     if (app->use_gpu)
-      damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size); 
+      damp->rate_host = mkarr(false, damp->rate->ncomp, damp->rate->size);
 
     if (damp->type == GKYL_GK_DAMPING_USER_INPUT) {
       struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
       proj_on_basis_c2p_ctx.cdim = app->cdim;
       proj_on_basis_c2p_ctx.vdim = gks->local_vel.ndim;
       proj_on_basis_c2p_ctx.vel_map = gks->vel_map;
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew( &(struct gkyl_proj_on_basis_inp) {
-          .grid = &gks->grid,
-          .basis = &gks->basis,
-          .num_quad = num_quad,
-          .num_ret_vals = 1,
-          .eval = gks->info.damping.rate_profile,
-          .ctx = gks->info.damping.rate_profile_ctx,
-          .c2p_func = proj_on_basis_c2p_phase_func,
-          .c2p_func_ctx = &proj_on_basis_c2p_ctx,
-        }
-      );
+      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_inew(&(struct gkyl_proj_on_basis_inp) {
+        .grid = &gks->grid,
+        .basis = &gks->basis,
+        .num_quad = num_quad,
+        .num_ret_vals = 1,
+        .eval = gks->info.damping.rate_profile,
+        .ctx = gks->info.damping.rate_profile_ctx,
+        .c2p_func = proj_on_basis_c2p_phase_func,
+        .c2p_func_ctx = &proj_on_basis_c2p_ctx,
+      });
       gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, damp->rate_host);
       gkyl_proj_on_basis_release(projup);
       gkyl_array_copy(damp->rate, damp->rate_host);
 
       if (num_quad == 1)
-        gkyl_array_scale_range(damp->rate, 1.0/pow(sqrt(2.0),gks->grid.ndim), &gks->local);
-    }
-    else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      damp->evolve = true; // Since the loss cone boundary is proportional to phi(t).
-
-      // Maximum bmag and its location.
-      // NOTE: if the same max bmag occurs at multiple locations,
-      // bmag_max_coord may have different values on different MPI processes.
-      double bmag_max_coord_ho[GKYL_MAX_CDIM];
-      double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX, bmag_max_coord_ho);
-      double bmag_max_local = bmag_max_ho;
-      double bmag_max_global;
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local, &bmag_max_global);
-      double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim];
-      if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = bmag_max_coord_ho[d];
-      }
-      else {
-        for (int d=0; d<app->cdim; d++)
-          bmag_max_coord_local[d] = -DBL_MAX;
-      }
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local, bmag_max_coord_global);
-
-      if (app->use_gpu) {
-        damp->bmag_max = gkyl_cu_malloc(sizeof(double));
-        damp->bmag_max_coord = gkyl_cu_malloc(app->cdim*sizeof(double));
-	gkyl_cu_memcpy(damp->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D);
-	gkyl_cu_memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double), GKYL_CU_MEMCPY_H2D);
-      }
-      else {
-        damp->bmag_max = gkyl_malloc(sizeof(double));
-        damp->bmag_max_coord = gkyl_malloc(app->cdim*sizeof(double));
-	memcpy(damp->bmag_max, &bmag_max_global, sizeof(double));
-	memcpy(damp->bmag_max_coord, bmag_max_coord_ho, app->cdim*sizeof(double));
-      }
-
-      // Electrostatic potential at bmag_max_coord.
-      if (app->use_gpu) {
-        damp->phi_m = gkyl_cu_malloc(sizeof(double));
-        damp->phi_m_global = gkyl_cu_malloc(sizeof(double));
-      }
-      else {
-        damp->phi_m = gkyl_malloc(sizeof(double));
-        damp->phi_m_global = gkyl_malloc(sizeof(double));
-      }
-
-      // Operator that projects the loss cone mask.
-      struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
-        .phase_grid = &gks->grid,
-        .conf_basis = &app->basis,
-        .phase_basis = &gks->basis,
-        .conf_range =  &app->local,
-        .conf_range_ext = &app->local_ext,
-        .vel_range = &gks->local_vel, 
-        .vel_map = gks->vel_map,
-        .bmag = app->gk_geom->geo_int.bmag,
-        .bmag_max = damp->bmag_max,
-        .bmag_max_loc = damp->bmag_max_coord,
-        .mass = gks->info.mass,
-        .charge = gks->info.charge,
-        .num_quad = num_quad,
-        .use_gpu = app->use_gpu,
-      };
-      damp->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
-
-      // Project the conf-space rate profile provided.
-      struct gkyl_array *scale_prof_high_order = mkarr(app->use_gpu, gks->basis.num_basis, gks->local_ext.volume);
-      struct gkyl_array *scale_prof_high_order_ho = app->use_gpu? mkarr(false, scale_prof_high_order->ncomp, scale_prof_high_order->size)
-                                                     : gkyl_array_acquire(scale_prof_high_order);
-      
-      gkyl_proj_on_basis *projup = gkyl_proj_on_basis_new(&gks->grid, &gks->basis, num_quad, 1, 
-        gks->info.damping.rate_profile, gks->info.damping.rate_profile_ctx);
-      gkyl_proj_on_basis_advance(projup, 0.0, &gks->local, scale_prof_high_order_ho);
-      gkyl_proj_on_basis_release(projup);
-      gkyl_array_copy(scale_prof_high_order, scale_prof_high_order_ho);
-
-      damp->scale_prof = mkarr(app->use_gpu, num_quad == 1? 1 : gks->basis.num_basis, gks->local_ext.volume);
-      gkyl_array_set_offset(damp->scale_prof, pow(sqrt(2.0),gks->grid.ndim), scale_prof_high_order, 0);
-
-      gkyl_array_release(scale_prof_high_order_ho);
-      gkyl_array_release(scale_prof_high_order);
-
-      // Compute the initial damping rate (assuming phi=0 because phi hasn't been computed).
-      // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(app->field->phi_smooth, damp->bmag_max_coord,
-        app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
-      gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
-      // Project the loss cone mask.
-      gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        app->field->phi_smooth, damp->phi_m_global, damp->rate);
-      // Multiply by the user's scaling profile.
-      gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
+        gkyl_array_scale_range(damp->rate, 1.0 / pow(sqrt(2.0), gks->grid.ndim), &gks->local);
     }
 
     // Set function pointers chosen at runtime.
@@ -206,7 +130,8 @@ gk_species_damping_init(struct gkyl_gyrokinetic_app *app, struct gk_species *gks
 }
 
 void
-gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks, struct gk_damping *damp, 
+gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *gks,
+  struct gk_damping *damp,
   const struct gkyl_array *phi, const struct gkyl_array *fin, struct gkyl_array *f_buffer,
   struct gkyl_array *rhs, struct gkyl_array *cflrate)
 {
@@ -217,23 +142,6 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
       gkyl_array_scale_by_cell(f_buffer, damp->rate);
       gkyl_array_accumulate(rhs, -1.0, f_buffer);
     }
-    else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      // Find the potential at the mirror throat.
-      gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, damp->bmag_max_coord,
-        app->basis_on_dev, &app->grid, &app->local, damp->phi_m);
-      gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, damp->phi_m, damp->phi_m_global);
-
-      // Project the loss cone mask.
-      gkyl_loss_cone_mask_gyrokinetic_advance(damp->lcm_proj_op, &gks->local, &app->local,
-        phi, damp->phi_m_global, damp->rate);
-
-      // Assemble the damping term -scale_prof * mask * f.
-      gkyl_array_set(f_buffer, 1.0, fin);
-      gkyl_array_scale_by_cell(damp->rate, damp->scale_prof);
-      gkyl_array_scale_by_cell(f_buffer, damp->rate);
-      gkyl_array_accumulate(rhs, -1.0, f_buffer);
-
-    }
 
     // Add the frequency to the CFL frequency.
     gkyl_array_accumulate(cflrate, 1.0, damp->rate);
@@ -243,7 +151,7 @@ gk_species_damping_advance(gkyl_gyrokinetic_app *app, const struct gk_species *g
 }
 
 void
-gk_species_damping_write(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame)
+gk_species_damping_write(gkyl_gyrokinetic_app *app, struct gk_species *gks, double tm, int frame)
 {
   gks->damping.write_func(app, gks, tm, frame);
 }
@@ -253,27 +161,12 @@ gk_species_damping_release(const struct gkyl_gyrokinetic_app *app, const struct
 {
   if (damp->type) {
     gkyl_array_release(damp->rate);
-    if (app->use_gpu)
+    if (app->use_gpu) {
       gkyl_array_release(damp->rate_host);
+    }
 
     if (damp->type == GKYL_GK_DAMPING_USER_INPUT) {
       // Nothing to release.
     }
-    else if (damp->type == GKYL_GK_DAMPING_LOSS_CONE) {
-      if (app->use_gpu) {
-        gkyl_cu_free(damp->bmag_max);
-        gkyl_cu_free(damp->bmag_max_coord);
-        gkyl_cu_free(damp->phi_m);
-        gkyl_cu_free(damp->phi_m_global);
-      }
-      else {
-        gkyl_free(damp->bmag_max);
-        gkyl_free(damp->bmag_max_coord);
-        gkyl_free(damp->phi_m);
-        gkyl_free(damp->phi_m_global);
-      }
-      gkyl_loss_cone_mask_gyrokinetic_release(damp->lcm_proj_op);
-      gkyl_array_release(damp->scale_prof);
-    }
   }
 }
diff --git a/gyrokinetic/apps/gk_species_fdot_multiplier.c b/gyrokinetic/apps/gk_species_fdot_multiplier.c
index 1e0b2fee54..86d9da7d5a 100644
--- a/gyrokinetic/apps/gk_species_fdot_multiplier.c
+++ b/gyrokinetic/apps/gk_species_fdot_multiplier.c
@@ -1,8 +1,7 @@
 #include <assert.h>
+#include <gkyl_alloc.h>
 #include <gkyl_gyrokinetic_priv.h>
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
-#include <gkyl_alloc.h>
-#include <gkyl_dg_basis_ops.h>
 
 static void
 gk_species_fdot_multiplier_write_disabled(gkyl_gyrokinetic_app *app, struct gk_species *gks,
@@ -47,7 +46,6 @@ gk_species_fdot_multiplier_write_enabled(gkyl_gyrokinetic_app *app, struct gk_sp
   app->stat.n_io += 1;
 
   gkyl_msgpack_data_release(mt);
-
   app->stat.species_diag_io_tm += gkyl_time_diff_now_sec(wst);
 }
 
@@ -126,19 +124,10 @@ gk_species_fdot_multiplier_advance_loss_cone_mult(gkyl_gyrokinetic_app *app,
   const struct gkyl_array *f, const struct gkyl_array *cflrate,
   struct gkyl_array *combined_multiplier)
 {
-  // Find the potential at the mirror throat.
-  gkyl_dg_basis_ops_eval_array_at_coord_comp(phi, fdmul->bmag_max_coord,
-    app->basis_on_dev, &app->grid, &app->local, fdmul->phi_m);
-  gkyl_comm_allreduce(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, fdmul->phi_m, fdmul->phi_m_global);
-
-  // Project the loss cone mask into buffer.
-  gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->local,
-    phi, fdmul->phi_m_global, fdmul->buffer);
-
-  // Apply constant scale factor.
+  gkyl_comm_array_allgather(app->comm, &app->local, &app->global, phi, fdmul->phi_global);
+  gkyl_loss_cone_mask_gyrokinetic_advance(fdmul->lcm_proj_op, &gks->local, &app->global,
+    fdmul->bmag_global, fdmul->phi_global, fdmul->buffer);
   gkyl_array_scale(fdmul->buffer, fdmul->time_dilation_scale_const);
-
-  // Multiply into combined.
   gkyl_array_scale_by_cell(combined_multiplier, fdmul->buffer);
 }
 
@@ -322,77 +311,24 @@ gk_species_fdot_multiplier_init_comp(gkyl_gyrokinetic_app *app, struct gk_specie
       fdmul->advance_func = gk_species_fdot_multiplier_advance_mult;
     }
     else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
-      fdmul->buffer = mkarr(app->use_gpu, basis_mult.num_basis, gks->local_ext.volume);
-
-      enum gkyl_quad_type qtype = GKYL_GAUSS_LOBATTO_QUAD;
-      int num_quad = gks->basis.poly_order + 1;
-
-      double bmag_max_coord_ho[GKYL_MAX_CDIM];
-      double bmag_max_ho = gkyl_gk_geometry_reduce_arg_bmag(app->gk_geom, GKYL_MAX,
-        bmag_max_coord_ho);
-      double bmag_max_local = bmag_max_ho;
-      double bmag_max_global;
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, 1, &bmag_max_local,
-        &bmag_max_global);
-      double bmag_max_coord_local[app->cdim], bmag_max_coord_global[app->cdim];
-      if (fabs(bmag_max_ho - bmag_max_global) < 1e-16) {
-        for (int d = 0; d < app->cdim; d++) {
-          bmag_max_coord_local[d] = bmag_max_coord_ho[d];
-        }
-      }
-      else {
-        for (int d = 0; d < app->cdim; d++) {
-          bmag_max_coord_local[d] = -DBL_MAX;
-        }
-      }
-      gkyl_comm_allreduce_host(app->comm, GKYL_DOUBLE, GKYL_MAX, app->cdim, bmag_max_coord_local,
-        bmag_max_coord_global);
-
-      if (app->use_gpu) {
-        fdmul->bmag_max = gkyl_cu_malloc(sizeof(double));
-        fdmul->bmag_max_coord = gkyl_cu_malloc(app->cdim * sizeof(double));
-        gkyl_cu_memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double), GKYL_CU_MEMCPY_H2D);
-        gkyl_cu_memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim * sizeof(double),
-          GKYL_CU_MEMCPY_H2D);
-      }
-      else {
-        fdmul->bmag_max = gkyl_malloc(sizeof(double));
-        fdmul->bmag_max_coord = gkyl_malloc(app->cdim * sizeof(double));
-        memcpy(fdmul->bmag_max, &bmag_max_global, sizeof(double));
-        memcpy(fdmul->bmag_max_coord, bmag_max_coord_ho, app->cdim * sizeof(double));
-      }
-
-      if (app->use_gpu) {
-        fdmul->phi_m = gkyl_cu_malloc(sizeof(double));
-        fdmul->phi_m_global = gkyl_cu_malloc(sizeof(double));
-      }
-      else {
-        fdmul->phi_m = gkyl_malloc(sizeof(double));
-        fdmul->phi_m_global = gkyl_malloc(sizeof(double));
-      }
-
+      // Operator that projects the loss cone mask.
       struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
-        .phase_grid = &gks->grid,
         .conf_basis = &app->basis,
-        .phase_basis = &gks->basis,
-        .conf_range = &app->local,
-        .conf_range_ext = &app->local_ext,
-        .vel_range = &gks->local_vel,
         .vel_map = gks->vel_map,
-        .bmag = app->gk_geom->geo_int.bmag,
-        .bmag_max = fdmul->bmag_max,
-        .bmag_max_loc = fdmul->bmag_max_coord,
         .mass = gks->info.mass,
         .charge = gks->info.charge,
-        .qtype = qtype,
-        .num_quad = num_quad,
-        .cellwise_trap_loss = cellwise_const,
-        .c2p_pos_func = proj_on_basis_c2p_position_func,
-        .c2p_pos_func_ctx = &fdmul->proj_on_basis_c2p_ctx,
         .use_gpu = app->use_gpu,
       };
       fdmul->lcm_proj_op = gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
+      fdmul->buffer = mkarr(app->use_gpu, basis_mult.num_basis, gks->local_ext.volume);
+      fdmul->bmag_global = mkarr(app->use_gpu, app->gk_geom->geo_corn.bmag->ncomp,
+        app->global_ext.volume);
+      fdmul->phi_global = mkarr(app->use_gpu, app->basis.num_basis, app->global_ext.volume);
+
+      gkyl_comm_array_allgather(app->comm, &app->local, &app->global, app->gk_geom->geo_corn.bmag,
+        fdmul->bmag_global);
+
       fdmul->advance_func = gk_species_fdot_multiplier_advance_loss_cone_mult;
     }
     else if ((fdmul->type == GKYL_GK_FDOT_MULTIPLIER_FIXED_DT) ||
@@ -569,18 +505,8 @@ gk_species_fdot_multiplier_release_comp(const struct gkyl_gyrokinetic_app *app,
   }
   else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE) {
     gkyl_array_release(fdmul->buffer);
-    if (app->use_gpu) {
-      gkyl_cu_free(fdmul->bmag_max);
-      gkyl_cu_free(fdmul->bmag_max_coord);
-      gkyl_cu_free(fdmul->phi_m);
-      gkyl_cu_free(fdmul->phi_m_global);
-    }
-    else {
-      gkyl_free(fdmul->bmag_max);
-      gkyl_free(fdmul->bmag_max_coord);
-      gkyl_free(fdmul->phi_m);
-      gkyl_free(fdmul->phi_m_global);
-    }
+    gkyl_array_release(fdmul->bmag_global);
+    gkyl_array_release(fdmul->phi_global);
     gkyl_loss_cone_mask_gyrokinetic_release(fdmul->lcm_proj_op);
   }
   else if (fdmul->type == GKYL_GK_FDOT_MULTIPLIER_CONSTANT) {
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic.h b/gyrokinetic/apps/gkyl_gyrokinetic.h
index ad377d5916..4d1da19ae6 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic.h
@@ -347,16 +347,13 @@ struct gkyl_gyrokinetic_positivity {
 enum gkyl_gyrokinetic_damping_type {
   GKYL_GK_DAMPING_NONE = 0,
   GKYL_GK_DAMPING_USER_INPUT,
-  GKYL_GK_DAMPING_LOSS_CONE,
 };
 
 struct gkyl_gyrokinetic_damping {
   // Add a damping term to the RHS of the gyrokinetic equation
   //   df/dt = - rate(z) * f
   // with the function rate(z) being:
-  //   - a function given by the user (type = GKYL_PROPORTIONAL_TERM_USER_INPUT).
-  //   - I_loss(z) * scale_factor * scale_profile(z), where I_loss(z) is =1 in the loss
-  //     cone and 0 in the confined region (type = GKYL_PROPORTIONAL_TERM_LOSS_CONE).
+  //   - a function given by the user (type = GKYL_GK_DAMPING_USER_INPUT).
   enum gkyl_gyrokinetic_damping_type type;
   void (*rate_profile)(double t, const double *xn, double *fout, void *ctx);
   void *rate_profile_ctx; // Context for rate_profile function.
diff --git a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
index 182a83f343..c251a61ac5 100644
--- a/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
+++ b/gyrokinetic/apps/gkyl_gyrokinetic_priv.h
@@ -840,11 +840,7 @@ struct gk_damping {
   bool evolve; // Whether the source is time dependent.
   struct gkyl_array *rate; // Damping rate.
   struct gkyl_array *rate_host; // Host copy for use in IO and projecting.
-  struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_coord; // Location of bmag_max.
-  double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
-  struct gkyl_array *scale_prof; // Conf-space scaling factor profile.
+  struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
   // Functions chosen at runtime.
   void (*write_func)(gkyl_gyrokinetic_app* app, struct gk_species *gks, double tm, int frame);
 };
@@ -855,10 +851,11 @@ struct gk_fdot_multiplier_comp {
   struct gkyl_array *buffer; // Per-component working storage.
   struct gkyl_array *buffer_ho; // Host copy for use in IO and projecting.
   struct gk_proj_on_basis_c2p_func_ctx proj_on_basis_c2p_ctx; // c2p function context.
+
+  // Loss cone mask objects
   struct gkyl_loss_cone_mask_gyrokinetic *lcm_proj_op; // Operator that projects the loss cone mask.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_coord; // Location of bmag_max.
-  double *phi_m, *phi_m_global; // Electrostatic potential at bmag_max.
+  struct gkyl_array *bmag_global; // Global bmag field used by the loss-cone updater.
+  struct gkyl_array *phi_global; // Global phi field used by the loss-cone updater.
 
   // Time dilation parameters (from input).
   double cfl_dt_min_value; // User-specified minimum dt value.
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_damped_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_damped_1x2v_p1.c
index d6f94e1324..954b775f79 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_damped_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_damped_1x2v_p1.c
@@ -704,10 +704,7 @@ int main(int argc, char **argv)
     },
 
     .damping = {
-//      .type = GKYL_GK_DAMPING_USER_INPUT,
-//      .rate_profile = loss_cone_damping_rate_profile,
-//      .rate_profile_ctx = &ctx,
-      .type = GKYL_GK_DAMPING_LOSS_CONE,
+      .type = GKYL_GK_DAMPING_USER_INPUT,
       .rate_profile = loss_cone_damping_rate_scaling,
       .rate_profile_ctx = &ctx,
     },
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
index 73f3e90e57..150c05fb17 100644
--- a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_1x2v_p1.c
@@ -30,80 +30,44 @@ struct gk_poa_phase_params {
 };
 
 // Define the context of the simulation. This is basically all the globals
-struct gk_mirror_ctx
-{
+struct gk_mirror_ctx {
   int cdim, vdim; // Dimensionality.
-
   // Plasma parameters
-  double mi; // Ion mass.
-  double me; // Electron mass.
-  double qi; // Ion charge.
-  double qe; // Electron charge.
-  double Te0; // Electron temperature.
-  double Ti0; // Ion temperature.
-  double n0; // Density.
-  double B_p; // Plasma magnetic field (mirror center).
-  double beta; // Plasma beta in the center.
-  double tau; // Temperature ratio.
-
-  // Parameters controlling initial conditions.
-  double alim;
-  double alphaIC0;
-  double alphaIC1;
-  double Ti_perp0; // Reference ion perp temperature.
-  double Ti_par0; // Reference ion par temperature.
-  double Ti_perp_m; // Ion perp temperature at the throat.
-  double Ti_par_m; // Ion par temperature at the throat.
-  double cs_m; // Ion sound speed at the throat.
-
-  double nuFrac; // Fraction multiplying collision frequency.
-  double logLambdaIon; // Ion Coulomb logarithm.
-  double nuIon; // Ion-ion collision freq.
-
-  double vti; // Ion thermal speed.
-  double vte; // Electron thermal speed.
-  double c_s; // Ion sound speed.
-  double omega_ci; // Ion gyrofrequency.
-  double rho_s; // Ion sound gyroradius.
-
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  double vti;
   double RatZeq0; // Radius of the field line at Z=0.
-  double Z_min; // Minimum axial coordinate Z.
-  double Z_max; // Maximum axial coordinate Z.
-  double z_min; // Minimum value of the position along the field line.
-  double z_max; // Maximum value of the position along the field line.
-  double psi_eval; // Psi (poloidal flux) of the field line.
-  double psi_in, z_in; // Auxiliary psi and z.
-
-  // Magnetic equilibrium model.
-  double mcB;
-  double gamma;
-  double Z_m; // Axial coordinate at mirror throat.
-  double z_m; // Computational coordinate at mirror throat.
-
-  // Source parameters
-  double NSrcIon;
-  double lineLengthSrcIon;
-  double sigSrcIon;
-  double NSrcFloorIon;
-  double TSrc0Ion;
-  double TSrcFloorIon;
-
-  // Physical velocity space limits.
-  double vpar_min_ion, vpar_max_ion;
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_eval;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
   double mu_max_ion;
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv, mu_lin_fac_inv; // Inverse factor of where linear mapping ends.
-  double vpar_pow, mu_pow; // Power of the velocity grid.
-  double vpar_min_ion_c, vpar_max_ion_c;
-  double mu_min_ion_c, mu_max_ion_c;
-
-  // Grid DOF.
+  int Npsi;
   int Nz;
   int Nvpar;
   int Nmu;
   int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
   int poly_order;
 
+  // Source parameters
+  double ion_source_amplitude;
+  double ion_source_sigma;
+  double ion_source_temp;
+
   double t_end; // End time.
   int num_frames; // Number of output frames.
   int num_phases; // Number of phases.
@@ -112,6 +76,15 @@ struct gk_mirror_ctx
   double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol; // Minimum allowable fraction of initial time-step.
   int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Geometry parameters for Lorentzian mirror
+  double mcB;     // Magnetic field parameter
+  double gamma;   // Width parameter for Lorentzian profile
+  double Z_m;     // Mirror throat location
+  double Z_min;   // Minimum Z coordinate
+  double Z_max;   // Maximum Z coordinate
+  double psi_in;  // Working variable for psi integration
+  double z_in;    // Working variable for z integration
 };
 
 double
@@ -121,9 +94,10 @@ psi_RZ(double RIn, double ZIn, void *ctx)
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
+
   double psi = 0.5 * pow(RIn, 2.) * mcB *
-               (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
-                1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
   return psi;
 }
 
@@ -131,9 +105,13 @@ double
 R_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rout = sqrt(2.0 * psiIn / (app->mcB * 
-    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
-     1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rout = sqrt(2. * psiIn / (mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
   return Rout;
 }
 
@@ -141,17 +119,21 @@ void
 Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
 {
   struct gk_mirror_ctx *app = ctx;
-  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
   double mcB = app->mcB;
   double gamma = app->gamma;
   double Z_m = app->Z_m;
-  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
-          (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
-            2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
-  *BZ = mcB *
-        (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
-         1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
-  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+
+  BRad[0] = -(1. / 2.) * Rcoord * mcB *
+    (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+
+  BZ[0] = mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+
+  Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
 }
 
 double
@@ -168,15 +150,13 @@ double
 z_psiZ(double psiIn, double ZIn, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
-  app->psi_in = psiIn;
   double eps = 0.0;
+  app->psi_in = psiIn;
   struct gkyl_qr_res integral;
-  if (eps <= ZIn)
-  {
+  if (eps <= ZIn) {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
   }
-  else
-  {
+  else {
     integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
     integral.res = -integral.res;
   }
@@ -200,14 +180,12 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   app->psi_in = psiIn;
   app->z_in = zIn;
   struct gkyl_qr_res Zout;
-  if (zIn >= 0.0)
-  {
+  if (0.0 <= zIn) {
     double fl = root_Z_psiz(-eps, ctx);
     double fr = root_Z_psiz(app->Z_max + eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
   }
-  else
-  {
+  else {
     double fl = root_Z_psiz(app->Z_min - eps, ctx);
     double fr = root_Z_psiz(eps, ctx);
     Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
@@ -215,235 +193,128 @@ Z_psiz(double psiIn, double zIn, void *ctx)
   return Zout.res;
 }
 
+// Geometry evaluation functions for the gk app
 void
-eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
-  double z = xn[0];
+  double psi = xc[0], theta = xc[1], z = xc[2];
 
-  struct gk_mirror_ctx *app = ctx;
-  double NSrc = app->NSrcIon;
-  double zSrc = app->lineLengthSrcIon;
-  double sigSrc = app->sigSrcIon;
-  double NSrcFloor = app->NSrcFloorIon;
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
 
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
 
-  if (fabs(Z) <= app->Z_m)
-  {
-    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
-                              exp(-pow(z - zSrc, 2) / (2.0 * pow(sigSrc, 2))));
-  }
-  else
-  {
-    fout[0] = 1e-16;
-  }
+  xp[0] = x; xp[1] = y; xp[2] = Z;
 }
 
 void
-eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
-  fout[0] = 0.0;
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
 }
 
+// Evaluate collision frequencies
 void
-eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double sigSrc = app->sigSrcIon;
-  double TSrc0 = app->TSrc0Ion;
-  double Tfloor = app->TSrcFloorIon;
-
-  if (fabs(z) <= 2.0 * sigSrc)
-  {
-    fout[0] = TSrc0;
-  }
-  else
-  {
-    fout[0] = Tfloor;
-  }
+  fout[0] = app->nuIon;
 }
 
-// Ion initial conditions
 void
 eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double sigma = 0.9*z_m;
-  if (fabs(z) <= sigma)
-  {
-    fout[0] = 0.5*app->n0*(1. + tanh(10. * sigma * fabs(sigma - fabs(z))));
-  }
-  else
-  {
-    fout[0] = 0.5*app->n0*exp(-5 * (fabs(sigma - fabs(z))));
-  }
+  double z = xn[0];
+  fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2));
 }
 
 void
 eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double cs_m = app->cs_m;
-  double z_m = app->z_m;
-  double z_max = app->z_max;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = 0.0;
-  }
-  else
-  {
-    fout[0] = (fabs(z) / z) * cs_m * tanh(3 * (z_max - z_m) * fabs(fabs(z) - z_m));
-  }
+  fout[0] = 0.0;
 }
 
 void
-eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_par0 = app->Ti_par0;
-  double Ti_par_m = app->Ti_par_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_par_m+(Ti_par0-Ti_par_m)*tanh(4 * fabs(z_m - fabs(z)));
-  }
-  else
-  {
-    fout[0] = Ti_par_m;
-  }
+  fout[0] = app->Ti0;
 }
 
 void
-eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xn[0];
-
   struct gk_mirror_ctx *app = ctx;
-  double z_m = app->z_m;
-  double Ti_perp0 = app->Ti_perp0;
-  double Ti_perp_m = app->Ti_perp_m;
-  if (fabs(z) <= z_m)
-  {
-    fout[0] = Ti_perp_m - Ti_perp0*tanh(3.*fabs(z_m-fabs(z)));
+  double z = xn[0];
+  double src_amp = app->ion_source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->ion_source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
   }
-  else
-  {
-    fout[0] = Ti_perp_m * GKYL_MAX2(1.e-3, exp(-5. * (fabs(z_m - fabs(z)))));
+  else {
+    fout[0] = 1e-16;
   }
 }
 
 void
-evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
-{
-  struct gk_mirror_ctx *app = ctx;
-  fout[0] = app->nuIon;
-}
-
-// Geometry evaluation functions for the gk app
-// mapc2p must assume a 3d input xc
-void
-mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double psi = xc[0];
-  double theta = xc[1];
-  double z = xc[2];
-
-  double Z = Z_psiz(psi, z, ctx);
-  double R = R_psiZ(psi, Z, ctx);
-
-  // Cartesian coordinates on plane perpendicular to Z axis.
-  double x = R * cos(theta);
-  double y = R * sin(theta);
-  xp[0] = x;
-  xp[1] = y;
-  xp[2] = Z;
-}
-
-// bmag_func must assume a 3d input xc
-void
-bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
-{
-  double z = xc[2];
-
-  struct gk_mirror_ctx *app = ctx;
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-  fout[0] = Bmag;
+  fout[0] = 0.0;
 }
 
-// bfield_func must assume a 3d input xc
 void
-bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
 {
-  double z = xc[2];
-
   struct gk_mirror_ctx *app = ctx;
-  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
-  double Z = Z_psiz(psi, z, ctx);
-  double BRad, BZ, Bmag;
-  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
-
-  double phi = xc[1];
-  // zc are computational coords. 
-  // Set Cartesian components of magnetic field.
-  fout[0] = BRad*cos(phi);
-  fout[1] = BRad*sin(phi);
-  fout[2] = BZ;
+  double z = xn[0];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
 }
 
-void mapc2p_vel_ion(double t, const double *vc, double* GKYL_RESTRICT vp, void *ctx)
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
 {
   struct gk_mirror_ctx *app = ctx;
   double vpar_max_ion = app->vpar_max_ion;
   double mu_max_ion = app->mu_max_ion;
 
   double cvpar = vc[0], cmu = vc[1];
-  // Linear map up to vpar_max/lin_frac_inv, then a power grid.
-  double vpar_lin_fac_inv = app->vpar_lin_fac_inv;
-  double vpar_pow = app->vpar_pow;
-  if (fabs(cvpar) <= 1.0/vpar_lin_fac_inv)
-    vp[0] = vpar_max_ion*cvpar;
-  else if (cvpar < -1.0/vpar_lin_fac_inv)
-    vp[0] = -vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-  else
-    vp[0] =  vpar_max_ion*pow(vpar_lin_fac_inv,vpar_pow-1)*pow(fabs(cvpar),vpar_pow);
-
-//  // Quadratic mu.
-//  vp[1] = mu_max_ion*pow(cmu,2.0);
-  // Linear map up to mu_max/lin_frac_inv, then a power grid.
-  double mu_lin_fac_inv = app->mu_lin_fac_inv;
-  double mu_pow = app->mu_pow;
-//  if (cmu <= 1.0/mu_lin_fac_inv)
-//    vp[0] = mu_max_ion*cmu;
-//  else
-//    vp[0] = mu_max_ion*pow(mu_lin_fac_inv,mu_pow-1)*pow(cmu,mu_pow);
-  double w = 0.3;
-  double f = 0.012;
-  double a = mu_max_ion*(f-1.0)/(w*w-1.0);
-  double b = mu_max_ion*(w*w-f)/(w*w-1.0);
-  if (cmu <= w)
-    vp[1] = (f*mu_max_ion/w)*cmu;
-  else
-    vp[1] = a*pow(cmu,2)+b;
-
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  vp[1] = mu_max_ion * pow(cmu, 3);
 }
 
 struct gk_mirror_ctx
 create_ctx(void)
 {
   int cdim = 1, vdim = 2; // Dimensionality.
+  int poly_order = 1;
 
   // Universal constant parameters.
   double eps0 = GKYL_EPSILON0;
@@ -463,82 +334,39 @@ create_ctx(void)
   double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
   double Ti0 = tau * Te0;
 
-  // Parameters controlling initial conditions.
-  double alim = 0.125;
-  double alphaIC0 = 2;
-  double alphaIC1 = 10;
-
-  double nuFrac = 1.0;
   // Ion-ion collision freq.
+  double nuFrac = 1.0;
   double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
   double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
-                 (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
 
   // Thermal speeds.
   double vti = sqrt(Ti0 / mi);
-  double vte = sqrt(Te0 / me);
-  double c_s = sqrt(Te0 / mi);
-
-  // Gyrofrequencies and gyroradii.
-  double omega_ci = eV * B_p / mi;
-  double rho_s = c_s / omega_ci;
-
-  // Geometry parameters.
-  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
-  // Axial coordinate Z extents. Endure that Z=0 is not on
-  // the boundary of a cell (due to AD errors).
-  double Z_min = -2.5;
-  double Z_max =  2.5;
-
-  // Parameters controlling the magnetic equilibrium model.
-  double mcB = 6.51292;
-  double gamma = 0.124904;
-  double Z_m = 0.98;
-
-  // Source parameters
-  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
-  double lineLengthSrcIon = 0.0;
-  double sigSrcIon = Z_m / 4.0;
-  double NSrcFloorIon = 0.05 * NSrcIon;
-  double TSrc0Ion = Ti0 * 1.25;
-  double TSrcFloorIon = TSrc0Ion / 8.0;
 
   // Grid parameters
   double vpar_max_ion = 16 * vti;
-  double vpar_min_ion = -vpar_max_ion;
   double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 64;
+  int Nvpar = 32; // 96 uniform
+  int Nmu = 16;  // 192 uniform
 
-  // Computational velocity space limits.
-  double vpar_lin_fac_inv = 4;
-  double vpar_pow = 3;
-  double vpar_min_ion_c = -1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double vpar_max_ion_c =  1.0/pow(vpar_lin_fac_inv,(vpar_pow-1)/vpar_pow);
-  double mu_min_ion_c = 0.;
-  double mu_max_ion_c = 1.;
-  double mu_lin_fac_inv = 1.0/0.012;
-  double mu_pow = 2;
-//  double mu_min_ion_c = 0.0;
-//  double mu_max_ion_c = 1.0/pow(mu_lin_fac_inv,(mu_pow-1)/mu_pow);
-
-  // Grid DOF:
-  int Nz = 64; // Number of cells in z direction.
-  int Nvpar = 32; // Number of cells in parallel velocity direction.
-  int Nmu = 16;  // Number of cells in mu direction.
-  int poly_order = 1;
-
-  // Initial conditions parameter.s
-  double Ti_perp0 = 10000 * eV;
-  double Ti_par0 = 7500 * eV;
+  // Source parameters
+  double ion_source_amplitude = 1.e20;
+  double ion_source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
 
-  // Parameters at mirror throat
-  double Ti_perp_m = 15000 * eV;
-  double Ti_par_m = 1000 * eV;
-  double z_m = 0.982544;
-  double cs_m = sqrt((Te0+3.0*Ti_par_m)/mi);
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+  double mcB = 3.691260;
+  double gamma = 0.226381;
+  double Z_m = 0.98;
 
-  // Factor multiplying collisionless terms.
-  double alpha_oap = 0.01;
+  // POA parameters
+  double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
   double alpha_fdp = 1.0;
+
   // Duration of each phase.
   double tau_oap = 100e-9;
   double tau_fdp = 1e-9;
@@ -553,112 +381,105 @@ create_ctx(void)
   // Whether to evolve the field.
   bool is_static_field_oap = true;
   bool is_static_field_fdp = false;
-  // Whether to enable positivity.
+
+  // Whether positivity is enabled.
   bool is_positivity_enabled_oap = false;
   bool is_positivity_enabled_fdp = true;
+
   // Type of df/dt multipler.
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
   enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
 
   // Calculate phase structure
-  double t_end = (tau_oap + tau_fdp)*num_cycles + tau_fdp_extra;
-  double tau_pair = tau_oap+tau_fdp; // Duration of an OAP+FDP pair.
-  int num_phases = 2*num_cycles + 1;
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
   int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
 
-  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases * sizeof(struct gk_poa_phase_params));
-  for (int i=0; i<(num_phases-1)/2; i++) {
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
     // OAPs.
-    poa_phases[2*i].phase = GK_POA_OAP;
-    poa_phases[2*i].num_frames = num_frames_oap;
-    poa_phases[2*i].duration = tau_oap;
-    poa_phases[2*i].alpha = alpha_oap;
-    poa_phases[2*i].is_static_field = is_static_field_oap;
-    poa_phases[2*i].fdot_mult_type = fdot_mult_type_oap;
-    poa_phases[2*i].is_positivity_enabled = is_positivity_enabled_oap;
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
 
     // FDPs.
-    poa_phases[2*i+1].phase = GK_POA_FDP;
-    poa_phases[2*i+1].num_frames = num_frames_fdp;
-    poa_phases[2*i+1].duration = tau_fdp;
-    poa_phases[2*i+1].alpha = alpha_fdp;
-    poa_phases[2*i+1].is_static_field = is_static_field_fdp;
-    poa_phases[2*i+1].fdot_mult_type = fdot_mult_type_fdp;
-    poa_phases[2*i+1].is_positivity_enabled = is_positivity_enabled_fdp;
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
   }
-  // Add an extra, longer FDP.
-  poa_phases[num_phases-1].phase = GK_POA_FDP;
-  poa_phases[num_phases-1].num_frames = num_frames_fdp_extra;
-  poa_phases[num_phases-1].duration = tau_fdp_extra;
-  poa_phases[num_phases-1].alpha = alpha_fdp;
-  poa_phases[num_phases-1].is_static_field = is_static_field_fdp;
-  poa_phases[num_phases-1].fdot_mult_type = fdot_mult_type_fdp;
-  poa_phases[num_phases-1].is_positivity_enabled = is_positivity_enabled_fdp;
-
-  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  // The final stage is an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
   double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
   double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
   int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
 
   struct gk_mirror_ctx ctx = {
-    .cdim = cdim,  .vdim = vdim,
-    .mi = mi,  .qi = qi,
-    .me = me,  .qe = qe,
-    .Te0 = Te0,  .Ti0 = Ti0,  .n0 = n0,
-    .B_p = B_p,  .beta = beta,  .tau = tau,
-    .alim = alim,
-    .alphaIC0 = alphaIC0,
-    .alphaIC1 = alphaIC1,
-    .nuFrac = nuFrac,  .logLambdaIon = logLambdaIon,  .nuIon = nuIon,
-    .vti = vti,  .vte = vte,  .c_s = c_s,
-    .omega_ci = omega_ci,  .rho_s = rho_s,
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .nuFrac = nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
     .RatZeq0 = RatZeq0,
-    .Z_min = Z_min,  .Z_max = Z_max,
-    // Parameters controlling the magnetic equilibrium model.
-    .mcB = mcB,  .gamma = gamma,
-    .Z_m = Z_m,
-    .z_m = z_m,
-    // Initial condition parameters.
-    .Ti_perp0 = Ti_perp0,  .Ti_par0 = Ti_par0,
-    .Ti_perp_m = Ti_perp_m,  .Ti_par_m = Ti_par_m,  .cs_m = cs_m,
-    // Source parameters
-    .NSrcIon = NSrcIon,  .NSrcFloorIon = NSrcFloorIon,
-    .TSrc0Ion = TSrc0Ion,  .TSrcFloorIon = TSrcFloorIon,
-    .lineLengthSrcIon = lineLengthSrcIon,  .sigSrcIon = sigSrcIon,
-    // Physical velocity space limits.
-    .vpar_min_ion = vpar_min_ion,
     .vpar_max_ion = vpar_max_ion,
     .mu_max_ion = mu_max_ion,
-    // Computational velocity space limits.
-    .vpar_lin_fac_inv = vpar_lin_fac_inv,
-    .vpar_pow = vpar_pow,
-    .vpar_min_ion_c = vpar_min_ion_c,
-    .vpar_max_ion_c = vpar_max_ion_c,
-    .mu_lin_fac_inv = mu_lin_fac_inv,
-    .mu_pow = mu_pow,
-    .mu_min_ion_c = mu_min_ion_c,
-    .mu_max_ion_c = mu_max_ion_c,
-    // Grid DOF.
     .Nz = Nz,
     .Nvpar = Nvpar,
     .Nmu = Nmu,
-    .cells = {Nz, Nvpar, Nmu},
+    .cells = { Nz, Nvpar, Nmu },
     .poly_order = poly_order,
-    // Time integration and I/O parameters.
     .t_end = t_end,
     .num_frames = num_frames,
     .num_phases = num_phases,
     .poa_phases = poa_phases,
-    .write_phase_freq     = write_phase_freq    , 
-    .int_diag_calc_freq   = int_diag_calc_freq  , 
-    .dt_failure_tol       = dt_failure_tol      , 
-    .num_failures_max     = num_failures_max    , 
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+
+    .ion_source_amplitude = ion_source_amplitude,
+    .ion_source_sigma = ion_source_sigma,
+    .ion_source_temp = ion_source_temp,
+
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
   };
 
   // Populate a couple more values in the context.
   ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
-  ctx.z_min    = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
-  ctx.z_max    = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
 
   return ctx;
 }
@@ -670,25 +491,25 @@ release_ctx(struct gk_mirror_ctx *ctx)
 }
 
 void
-calc_integrated_diagnostics(struct gkyl_tm_trigger* iot, gkyl_gyrokinetic_app* app,
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
   double t_curr, bool force_calc, double dt)
 {
   if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
     gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
     gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
 
-    if ( !(dt < 0.0) )
+    if (!(dt < 0.0) )
       gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
   }
 }
 
 void
-write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
-  gkyl_gyrokinetic_app* app, double t_curr, bool force_write)
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
 {
   bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
   if (trig_now_conf || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
     gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
 
     gkyl_gyrokinetic_app_write_field_energy(app);
@@ -698,7 +519,7 @@ write_data(struct gkyl_tm_trigger* iot_conf, struct gkyl_tm_trigger* iot_phase,
 
   bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
   if (trig_now_phase || force_write) {
-    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr-1;
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
 
     gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
   }
@@ -720,7 +541,7 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   double t_end = tfs->t_end;
   int frame_curr = tfs->frame_curr;
   int num_frames = tfs->num_frames;
-  int num_int_diag_calc = ctx->int_diag_calc_freq*num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
 
   // Prevent division by zero when frame_curr equals num_frames
   int frames_remaining = num_frames - frame_curr;
@@ -734,15 +555,16 @@ void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
   trig_write_phase->tcurr = t_curr;
   trig_write_phase->curr = frame_curr;
 
-  int diag_frames = GKYL_MAX2(frames_remaining, (num_int_diag_calc/num_frames) * frames_remaining);
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
   trig_calc_intdiag->dt = time_remaining / diag_frames;
   trig_calc_intdiag->tcurr = t_curr;
   trig_calc_intdiag->curr = frame_curr;
 }
 
-void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_steps,
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
   struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
-  struct gkyl_tm_trigger *trig_calc_intdiag,  struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
   struct gk_poa_phase_params *pparams)
 {
   tfs->t_end = tfs->t_curr + pparams->duration;
@@ -751,7 +573,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   // Run an OAP or FDP.
   double t_curr = tfs->t_curr;
   double t_end = tfs->t_end;
-  
+
   // Reset I/O triggers:
   reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
 
@@ -794,6 +616,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   int num_failures = 0, num_failures_max = ctx->num_failures_max;
 
   long step = 1;
+
   while ((t_curr < t_end) && (step <= num_steps))
   {
     if (step == 1 || step % 1 == 0)
@@ -801,6 +624,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
 
     dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
     struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
 
     if (step == 1 || step % 1 == 0)
       gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
@@ -813,8 +637,8 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
     t_curr += status.dt_actual;
     dt = status.dt_suggested;
 
-    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
-    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
 
     if (dt_init < 0.0) {
       dt_init = status.dt_actual;
@@ -826,8 +650,10 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
       gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
       gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
       if (num_failures >= num_failures_max) {
-        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ", dt_failure_tol);
-        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n", num_failures_max);
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
         calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
         write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
         break;
@@ -841,7 +667,7 @@ void run_phase(gkyl_gyrokinetic_app* app, struct gk_mirror_ctx *ctx, double num_
   }
 
   tfs->t_curr = t_curr;
-  tfs->frame_curr = tfs->frame_curr+pparams->num_frames;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
 }
 
 int main(int argc, char **argv)
@@ -860,64 +686,45 @@ int main(int argc, char **argv)
   struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
 
   int cells_x[ctx.cdim], cells_v[ctx.vdim];
-  for (int d=0; d<ctx.cdim; d++)
+  for (int d = 0; d < ctx.cdim; d++) {
     cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
-  for (int d=0; d<ctx.vdim; d++)
-    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim+d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
 
   // Construct communicator for use in app.
   struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
 
   struct gkyl_gyrokinetic_species ion = {
     .name = "ion",
-    .charge = ctx.qi,  .mass = ctx.mi,
+    .charge = ctx.qi,
+    .mass = ctx.mi,
     .vdim = ctx.vdim,
-    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c},
-    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c},
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
     .cells = { cells_v[0], cells_v[1] },
-
     .polarization_density = ctx.n0,
 
-    .mapc2p = {
-      .mapping = mapc2p_vel_ion,
-      .ctx = &ctx,
-    },
-
     .projection = {
-      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
       .density = eval_density_ion,
-      .upar = eval_upar_ion,
-      .temppar = eval_temp_par_ion,
-      .tempperp = eval_temp_perp_ion,
       .ctx_density = &ctx,
+      .upar = eval_upar_ion,
       .ctx_upar = &ctx,
-      .ctx_temppar = &ctx,
-      .ctx_tempperp = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
     },
 
     .collisionless = {
       .type = GKYL_GK_COLLISIONLESS_ES,
       .scale_factor = 1.0, // Will be replaced below.
-    },
-
-    .collisions =  {
-      .collision_id = GKYL_LBO_COLLISIONS,
-      .self_nu = evalNuIon,
-      .self_nu_ctx = &ctx,
-    },
-
-    .source = {
-      .source_id = GKYL_PROJ_SOURCE,
-      .num_sources = 1,
-      .projection[0] = {
-        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM, 
-        .density = eval_density_ion_source,
-        .upar = eval_upar_ion_source,
-        .temp = eval_temp_ion_source,
-        .ctx_density = &ctx,
-        .ctx_upar = &ctx,
-        .ctx_temp = &ctx,
-      }, 
+      .write_diagnostics = true,
     },
 
     .time_rate_multiplier = {
@@ -929,30 +736,60 @@ int main(int argc, char **argv)
       },
     },
 
-    .positivity = {
-      .type = GKYL_GK_POSITIVITY_SHIFT,
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
       .write_diagnostics = true,
     },
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_ion_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_ion_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
+    },
 
     .bcs = {
       { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
       { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
     },
-
-    .num_diag_moments = 4,
-    .diag_moments = {GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN},
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
   };
-
   struct gkyl_gyrokinetic_field field = {
     .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
     .electron_mass = ctx.me,
     .electron_charge = ctx.qe,
     .electron_temp = ctx.Te0,
-    .is_static = false, // So solvers are allocated.
+    .is_static = false,
   };
 
-  // GK app
-  struct gkyl_gk app_inp = { 
+  struct gkyl_gk app_inp = {  // GK app
     .cdim = ctx.cdim,
     .lower = {ctx.z_min},
     .upper = {ctx.z_max},
@@ -966,14 +803,14 @@ int main(int argc, char **argv)
       .mapc2p = mapc2p, // Mapping of computational to physical space.
       .c2p_ctx = &ctx,
       .bfield_func = bfield_func, // Magnetic field.
-      .bfield_ctx = &ctx
+      .bfield_ctx = &ctx,
     },
 
     .num_periodic_dir = 0,
     .periodic_dirs = {},
 
     .num_species = 1,
-    .species = {ion},
+    .species = { ion },
 
     .field = field,
 
@@ -984,9 +821,10 @@ int main(int argc, char **argv)
     },
   };
 
-  // Create app object.
   // Set app output name from the executable name (argv[0]).
   snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
   gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
 
   // Triggers for IO.
@@ -1001,10 +839,12 @@ int main(int argc, char **argv)
 
   int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
   if (app_args.is_restart) {
-    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app, app_args.restart_frame);
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
 
     if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
-      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n", gkyl_array_rio_status_msg(status.io_status));
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
       goto freeresources;
     }
 
@@ -1015,14 +855,15 @@ int main(int argc, char **argv)
     double time_count = 0.0;
     int frame_count = 0;
     int pit_curr = 0;
-    for (int pit=0; pit<ctx.num_phases; pit++) {
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
       time_count += ctx.poa_phases[pit].duration;
       frame_count += ctx.poa_phases[pit].num_frames;
       if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
         pit_curr = pit;
         break;
       }
-    };
+    }
+    ;
     phase_idx_init = pit_curr;
 
     // Change the duration and number frames so this phase reaches the expected
@@ -1048,10 +889,11 @@ int main(int argc, char **argv)
     phase_idx_end = 1;
 
   // Loop over number of number of phases;
-  for (int pit=phase_idx_init; pit<phase_idx_end; pit++) {
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
     gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
     struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
-    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag, &tfs, phase_params);
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
   }
 
   gkyl_gyrokinetic_app_stat_write(app);
@@ -1061,21 +903,22 @@ int main(int argc, char **argv)
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
-  if (stat.nstage_2_fail > 0)
-  {
-    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[1]);
-    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n", stat.stage_2_dt_diff[0]);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
   }
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
   gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
   gkyl_gyrokinetic_app_print_timings(app, stdout);
 
-  freeresources:
+freeresources:
   // simulation complete, free app
   gkyl_gyrokinetic_app_release(app);
   gkyl_gyrokinetic_comms_release(comm);
   release_ctx(&ctx);
-  
+
 #ifdef GKYL_HAVE_MPI
   if (app_args.use_mpi)
     MPI_Finalize();
diff --git a/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
new file mode 100644
index 0000000000..cb86762e1f
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_boltz_elc_poa_2x2v_p1.c
@@ -0,0 +1,924 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  double vti;
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_max;
+  double psi_min;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
+  double mu_max_ion;
+  int Npsi;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  // Source parameters
+  double ion_source_amplitude;
+  double ion_source_sigma;
+  double ion_source_temp;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Geometry parameters for Lorentzian mirror
+  double mcB;     // Magnetic field parameter
+  double gamma;   // Width parameter for Lorentzian profile
+  double Z_m;     // Mirror throat location
+  double Z_min;   // Minimum Z coordinate
+  double Z_max;   // Maximum Z coordinate
+  double psi_in;  // Working variable for psi integration
+  double z_in;    // Working variable for z integration
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rout = sqrt(2. * psiIn / (mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))))));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+
+  BRad[0] = -(1. / 2.) * Rcoord * mcB *
+    (-2. * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2. * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+
+  BZ[0] = mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) );
+
+  Bmag[0] = sqrt(pow(BRad[0], 2) + pow(BZ[0], 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double eps = 0.0;
+  app->psi_in = psiIn;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn) {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (0.0 <= zIn) {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+// Geometry evaluation functions for the gk app
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0], theta = xc[1], z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+
+  xp[0] = x; xp[1] = y; xp[2] = Z;
+}
+
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
+}
+
+// Evaluate collision frequencies
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+void
+eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[1];
+  fout[0] = 1e17 * exp(-2 * pow(fabs(z), 2));
+}
+
+void
+eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti0;
+}
+
+void
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[1];
+  double src_amp = app->ion_source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->ion_source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6) / 0.98);
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[1];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 0.98) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  vp[1] = mu_max_ion * pow(cmu, 3);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 2, vdim = 2; // Dimensionality.
+  int poly_order = 1;
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0;
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS;
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+
+  // Ion-ion collision freq.
+  double nuFrac = 1.0;
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 64;
+  int Npsi = 4;
+  int Nvpar = 32; // 96 uniform
+  int Nmu = 16;  // 192 uniform
+
+  // Source parameters
+  double ion_source_amplitude = 1.e20;
+  double ion_source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+  double mcB = 3.691260;
+  double gamma = 0.226381;
+  double Z_m = 0.98;
+
+  // POA parameters
+  double alpha_oap = 5e-6;  // Factor multiplying collisionless terms.
+  double alpha_fdp = 1.0;
+  double tau_oap = 0.001;  // Duration of each phase.
+  double tau_fdp = 7e-9;
+  double tau_fdp_extra = 2e-9;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1;        // Frames per OAP phase
+  int num_frames_fdp = 1;        // Frames per FDP phase
+  int num_frames_fdp_extra = 2;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+
+  // Whether positivity is enabled.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = false;
+
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // The final stage is an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 1; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .nuFrac = nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .RatZeq0 = RatZeq0,
+    .vpar_max_ion = vpar_max_ion,
+    .mu_max_ion = mu_max_ion,
+    .Npsi = Npsi,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Npsi, Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+
+    .ion_source_amplitude = ion_source_amplitude,
+    .ion_source_sigma = ion_source_sigma,
+    .ion_source_temp = ion_source_temp,
+
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_max = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.psi_min = psi_RZ(ctx.RatZeq0 / 10, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_max, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_max, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .num_multipliers = 1,
+    .multiplier[0] = {
+      .type = pparams->fdot_mult_type,
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    }
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx->me,
+    .electron_charge = ctx->qe,
+    .electron_temp = ctx->Te0,
+    .polarization_bmag = ctx->B_p,
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+    dt = t_end - t_curr; // Ensure we don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density_ion,
+      .ctx_density = &ctx,
+      .upar = eval_upar_ion,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+      .write_diagnostics = true,
+    },
+
+    .time_rate_multiplier = {
+      .num_multipliers = 1,
+      .multiplier[0] = {
+        .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+        .cellwise_const = true,
+        .write_diagnostics = true,
+      }
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .write_diagnostics = true,
+    },
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_ion_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_ion_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_BIMAXWELLIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_ZERO_FLUX },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_ABSORB },
+      { .dir = 1, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
+      { .dir = 1, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH },
+    },
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
+  };
+  struct gkyl_gyrokinetic_field field = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx.me,
+    .electron_charge = ctx.qe,
+    .electron_temp = ctx.Te0,
+    .is_static = false,
+  };
+
+  struct gkyl_gk app_inp = {  // GK app
+    .name = "gk_mirror_boltz_elc_poa_2x2v_p1",
+    .cdim = ctx.cdim,
+    .upper = { ctx.psi_max, ctx.Z_max },
+    .lower = { ctx.psi_min, ctx.Z_min },
+    .cells = { cells_x[0], cells_x[1] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
+      .world = { 0.0 },
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx,
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+
+    .num_species = 1,
+    .species = { ion },
+
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0], app_args.cuts[1] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
new file mode 100644
index 0000000000..50a1557fb3
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_kinetic_elc_poa_1x2v_p1.c
@@ -0,0 +1,1274 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double kperpRhos;
+  // Parameters controlling initial conditions.
+  double alim;
+  double alphaIC0;
+  double alphaIC1;
+  double nuFrac;
+  // Electron-electron collision freq.
+  double logLambdaElc;
+  double nuElc;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  // Thermal speeds.
+  double vti;
+  double vte;
+  double c_s;
+  // Gyrofrequencies and gyroradii.
+  double omega_ci;
+  double rho_s;
+  double kperp; // Perpendicular wavenumber in SI units.
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double Z_min;
+  double Z_max;
+  double z_min;
+  double z_max;
+  double psi_eval;
+  double psi_in;
+  double z_in;
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m;
+  // Bananna tip info. Hardcoad to avoid dependency on ctx
+  double B_bt;
+  double R_bt;
+  double Z_bt;
+  double z_bt;
+  double R_m;
+  double B_m;
+  double z_m;
+  // Physics parameters at mirror throat
+  double n_m;
+  double Te_m;
+  double Ti_m;
+  double cs_m;
+  // Source parameters
+  double NSrcIon;
+  double lineLengthSrcIon;
+  double sigSrcIon;
+  double NSrcFloorIon;
+  double TSrc0Ion;
+  double TSrcFloorIon;
+  double NSrcElc;
+  double lineLengthSrcElc;
+  double sigSrcElc;
+  double NSrcFloorElc;
+  double TSrc0Elc;
+  double TSrcFloorElc;
+  double alpha; // Multirate factor.
+  // Grid parameters
+  double vpar_max_ion;
+  double vpar_max_elc;
+  double mu_max_ion;
+  double mu_max_elc;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB *
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))))));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+    (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) -
+    2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))));
+  *BZ = mcB *
+    (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))));
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn) {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0) {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+// -- Source functions.
+void
+eval_density_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double NSrc = app->NSrcElc;
+  double zSrc = app->lineLengthSrcElc;
+  double sigSrc = app->sigSrcElc;
+  double NSrcFloor = app->NSrcFloorElc;
+  if (fabs(Z) <= app->Z_m) {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2.))) *
+      exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2.))));
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double sigSrc = app->sigSrcElc;
+  double TSrc0 = app->TSrc0Elc;
+  double Tfloor = app->TSrcFloorElc;
+  if (fabs(z) <= 2.0 * sigSrc) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+void
+eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double NSrc = app->NSrcIon;
+  double zSrc = app->lineLengthSrcIon;
+  double sigSrc = app->sigSrcIon;
+  double NSrcFloor = app->NSrcFloorIon;
+  if (fabs(Z) <= app->Z_m) {
+    fout[0] = fmax(NSrcFloor, (NSrc / sqrt(2.0 * M_PI * pow(sigSrc, 2))) *
+      exp(-1 * pow((z - zSrc), 2) / (2.0 * pow(sigSrc, 2))));
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double sigSrc = app->sigSrcIon;
+  double TSrc0 = app->TSrc0Ion;
+  double Tfloor = app->TSrcFloorIon;
+  if (fabs(z) <= 2.0 * sigSrc) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+// Electrons initial conditions
+void
+eval_density_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->n0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
+  }
+  else {
+    fout[0] = app->n_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+void
+eval_upar_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  if (fabs(z) <= app->z_m) {
+    fout[0] = 0.0;
+  }
+  else if (z > app->z_m) {
+    fout[0] = app->cs_m * (z - app->z_m);
+  }
+  else {
+    fout[0] = app->cs_m * (z + app->z_m);
+  }
+}
+
+void
+eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC0 / 2.);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->Te0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2.)), app->alphaIC1 / 2.);
+  }
+  else {
+    fout[0] = app->Te_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+// Ion initial conditions
+void
+eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC0 / 2);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->n0 * pow(1.0 - pow((R - app->R_bt) / app->alim, 2), app->alphaIC1 / 2);
+  }
+  else {
+    fout[0] = app->n_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+void
+eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  if (fabs(z) <= app->z_m) {
+    fout[0] = 0.0;
+  }
+  else if (z > app->z_m) {
+    fout[0] = app->cs_m * (z - app->z_m); // * (z -  / app->z_m);
+  }
+  else {
+    fout[0] = app->cs_m * (z + app->z_m); // * (z + app->z_m) / app->z_m;
+  }
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double z = xn[0];
+  double Z = Z_psiz(psi, z, ctx); // Cylindrical axial coordinate.
+  double R = R_psiZ(psi, Z, ctx); // Cylindrical radial coordinate.
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  if (fabs(Z) <= app->Z_bt) {
+    fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC0 / 2);
+  }
+  else if (fabs(Z) <= app->Z_m) {
+    fout[0] = app->Ti0 * pow((1.0 - pow((R - app->R_bt) / app->alim, 2)), app->alphaIC1 / 2);
+  }
+  else {
+    fout[0] = app->Ti_m * sqrt(Bmag / app->B_m);
+  }
+}
+
+// Potential initial condition
+void
+eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double z_m = 0.98;
+  double z_max = app->z_max;
+  double sigma = 0.2 * z_m;
+  double center_potential = 8.0 * app->Te0 / app->qi;
+  if (fabs(z) <= sigma) {
+    fout[0] = center_potential;
+  }
+  else {
+    fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
+  }
+}
+
+// Evaluate collision frequencies
+void
+evalNuElc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuElc;
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bmag_func must assume a 3d input xc
+void
+bmag_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+  fout[0] = Bmag;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xc[2];
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0; // Not sure if this is right
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS; // ion mass
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+  double kperpRhos = 0.1;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  // Electron-electron collision freq.
+  double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
+  double nuElc = nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
+    (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Perpendicular wavenumber in SI units:
+  double kperp = kperpRhos / rho_s;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -2.5;
+  double Z_max = 2.5;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 6.51292;
+  double gamma = 0.124904;
+  double Z_m = 0.98;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0;
+  double lineLengthSrcIon = 0.0;
+  double sigSrcIon = Z_m / 4.0;
+  double NSrcFloorIon = 0.05 * NSrcIon;
+  double TSrc0Ion = Ti0 * 1.25;
+  double TSrcFloorIon = TSrc0Ion / 8.0;
+  double NSrcElc = NSrcIon;
+  double lineLengthSrcElc = lineLengthSrcIon;
+  double sigSrcElc = sigSrcIon;
+  double NSrcFloorElc = NSrcFloorIon;
+  double TSrc0Elc = TSrc0Ion / tau;
+  double TSrcFloorElc = TSrcFloorIon / tau;
+
+  // Bananna tip info. Hardcoad to avoid dependency on ctx
+  double B_bt = 1.058278;
+  double R_bt = 0.071022;
+  double Z_bt = 0.467101;
+  double z_bt = 0.468243;
+  double R_m = 0.017845;
+  double B_m = 16.662396;
+  double z_m = 0.982544;
+
+  // Physics parameters at mirror throat
+  double n_m = 1.105617e19;
+  double Te_m = 346.426583 * eV;
+  double Ti_m = 3081.437703 * eV;
+  double cs_m = 4.037740e5;
+
+  double alpha = 0.01; // Multirate factor.
+
+  // Grid parameters
+  double vpar_max_elc = 20 * vte;
+  double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p);
+  double vpar_max_ion = 20 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 32;
+  int Nvpar = 32; // Number of cells in the paralell velocity direction 96
+  int Nmu = 16;  // Number of cells in the mu direction 192
+  int poly_order = 1;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 1e-7;
+  double tau_fdp = 3e-10;
+  double tau_fdp_extra = 2 * tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .kperpRhos = kperpRhos,
+    .alim = alim,
+    .alphaIC0 = alphaIC0,
+    .alphaIC1 = alphaIC1,
+    .nuFrac = nuFrac,
+    .logLambdaElc = logLambdaElc,
+    .nuElc = nuElc,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .vte = vte,
+    .c_s = c_s,
+    .omega_ci = omega_ci,
+    .rho_s = rho_s,
+    .kperp = kperp,
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min,
+    .Z_max = Z_max,
+    .mcB = mcB,
+    .gamma = gamma,
+    .Z_m = Z_m,
+    .B_bt = B_bt,
+    .R_bt = R_bt,
+    .Z_bt = Z_bt,
+    .z_bt = z_bt,
+    .R_m = R_m,
+    .B_m = B_m,
+    .z_m = z_m,
+    .n_m = n_m,
+    .Te_m = Te_m,
+    .Ti_m = Ti_m,
+    .cs_m = cs_m,
+    .NSrcIon = NSrcIon,
+    .lineLengthSrcIon = lineLengthSrcIon,
+    .sigSrcIon = sigSrcIon,
+    .NSrcFloorIon = NSrcFloorIon,
+    .TSrc0Ion = TSrc0Ion,
+    .TSrcFloorIon = TSrcFloorIon,
+    .NSrcElc = NSrcElc,
+    .lineLengthSrcElc = lineLengthSrcElc,
+    .sigSrcElc = sigSrcElc,
+    .NSrcFloorElc = NSrcFloorElc,
+    .TSrc0Elc = TSrc0Elc,
+    .TSrcFloorElc = TSrcFloorElc,
+    .vpar_max_ion = vpar_max_ion,
+    .vpar_max_elc = vpar_max_elc,
+    .mu_max_ion = mu_max_ion,
+    .mu_max_elc = mu_max_elc,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .num_multipliers = 1,
+    .multiplier[0] = {
+      .type = pparams->fdot_mult_type,
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    }
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .polarization_bmag = ctx->B_p,
+    .kperpSq = pow(ctx->kperp, 2.),
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species elc = {
+    .name = "elc",
+    .charge = ctx.qe,
+    .mass = ctx.me,
+    .vdim = ctx.vdim,
+    .lower = { -ctx.vpar_max_elc, 0.0 },
+    .upper = { ctx.vpar_max_elc, ctx.mu_max_elc },
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .ctx_density = &ctx,
+      .density = eval_density_elc,
+      .ctx_upar = &ctx,
+      .upar = eval_upar_elc,
+      .ctx_temp = &ctx,
+      .temp = eval_temp_elc,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .num_cross_collisions = 1,
+      .collide_with = { "ion" },
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_elc_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_elc_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_elc_source,
+      },
+    },
+
+    .time_rate_multiplier = {
+      .num_multipliers = 1,
+      .multiplier[0] = {
+        .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+        .cellwise_const = true,
+        .write_diagnostics = true,
+      },
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                      GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
+  };
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { -ctx.vpar_max_ion, 0.0 },
+    .upper = { ctx.vpar_max_ion, ctx.mu_max_ion },
+    .cells = { cells_v[0], cells_v[1] },
+    .scale_with_polarization = true,
+
+    .polarization_density = ctx.n0,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .ctx_density = &ctx,
+      .density = eval_density,
+      .ctx_upar = &ctx,
+      .upar = eval_upar,
+      .ctx_temp = &ctx,
+      .temp = eval_temp_ion,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Ti0,
+      .num_cross_collisions = 1,
+      .collide_with = { "elc" },
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+    },
+
+    .time_rate_multiplier = {
+      .num_multipliers = 1,
+      .multiplier[0] = {
+        .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+        .cellwise_const = true,
+        .write_diagnostics = true,
+      },
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                      GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .polarization_bmag = ctx.B_p, // Issue here. B0 from soloviev, so not sure what to do. Ours is not constant
+    .kperpSq = pow(ctx.kperp, 2.),
+    .is_static = false, // Will be replaced below.
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = {
+    .name = "gk_mirror_kinetic_elc_poa_1x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
+      .world = { ctx.psi_eval, 0.0 },
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+    .num_species = 2,
+    .species = { elc, ion },
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
new file mode 100644
index 0000000000..ec74f5ccc3
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_mirror_tandem_boltz_elc_poa_1x2v.c
@@ -0,0 +1,953 @@
+#include <math.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+
+  // Plasma parameters
+  double mi; // Ion mass.
+  double me; // Electron mass.
+  double qi; // Ion charge.
+  double qe; // Electron charge.
+  double Te0; // Electron temperature.
+  double Ti0; // Ion temperature.
+  double n0; // Density.
+  double B_p; // Plasma magnetic field (mirror center).
+  double beta; // Plasma beta in the center.
+  double tau; // Temperature ratio.
+
+  double Ti_perp0; // Reference ion perp temperature.
+  double Ti_par0; // Reference ion par temperature.
+  double cs_m; // Ion sound speed at the throat.
+
+  double nuFrac; // Fraction multiplying collision frequency.
+  double logLambdaIon; // Ion Coulomb logarithm.
+  double nuIon; // Ion-ion collision freq.
+
+  double vti; // Ion thermal speed.
+  double vte; // Electron thermal speed.
+  double c_s; // Ion sound speed.
+  double omega_ci; // Ion gyrofrequency.
+  double rho_s; // Ion sound gyroradius.
+
+  double RatZeq0; // Radius of the field line at Z=0.
+  double Z_min; // Minimum axial coordinate Z.
+  double Z_max; // Maximum axial coordinate Z.
+  double z_min; // Minimum value of the position along the field line.
+  double z_max; // Maximum value of the position along the field line.
+  double psi_eval; // Psi (poloidal flux) of the field line.
+  double psi_in, z_in; // Auxiliary psi and z.
+
+  // Magnetic equilibrium model.
+  double mcB;
+  double gamma;
+  double Z_m; // Axial coordinate at mirror throat.
+  double z_m; // Computational coordinate at mirror throat.
+
+  // Source parameters
+  double NSrcIon;
+  double TSrc0Ion;
+
+  // Physical velocity space limits.
+  double vpar_min_ion, vpar_max_ion;
+  double mu_max_ion;
+  // Computational velocity space limits.
+  double vpar_min_ion_c, vpar_max_ion_c;
+  double mu_min_ion_c, mu_max_ion_c;
+
+  // Grid DOF.
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+};
+
+double
+psi_RZ(double RIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  double psi = 0.5 * pow(RIn, 2.) * mcB *
+    (1. / (M_PI * gamma * (1. + pow((ZIn - Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn - 2 * Z_m) / gamma, 2.))) +
+    1. / (M_PI * gamma * (1. + pow((ZIn + 2 * Z_m) / gamma, 2.))));
+  return psi;
+}
+
+double
+R_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rout = sqrt(2.0 * psiIn / (app->mcB *
+    (1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - app->Z_m) / app->gamma, 2.))) +
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + app->Z_m) / app->gamma, 2.))) +
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn - 2 * app->Z_m) / app->gamma, 2.))) +
+    1.0 / (M_PI * app->gamma * (1.0 + pow((ZIn + 2 * app->Z_m) / app->gamma, 2.)))
+    )));
+  return Rout;
+}
+
+void
+Bfield_psiZ(double psiIn, double ZIn, void *ctx, double *BRad, double *BZ, double *Bmag)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double Rcoord = R_psiZ(psiIn, ZIn, ctx);
+  double mcB = app->mcB;
+  double gamma = app->gamma;
+  double Z_m = app->Z_m;
+  *BRad = -(1.0 / 2.0) * Rcoord * mcB *
+    (-2.0 * (ZIn - Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn - Z_m) / gamma, 2.), 2.))) +
+    -2.0 * (ZIn + Z_m) / (M_PI * pow(gamma, 3.) * (pow(1.0 + pow((ZIn + Z_m) / gamma, 2.), 2.))) +
+    -2.0 * (ZIn - 2 * Z_m) / (M_PI * pow(gamma,
+      3.) * (pow(1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.), 2.))) +
+    -2.0 * (ZIn + 2 * Z_m) / (M_PI * pow(gamma,
+      3.) * (pow(1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.), 2.)))
+    );
+  *BZ = mcB *
+    (1.0 / (M_PI * gamma * (1.0 + pow((ZIn - Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn - 2 * Z_m) / gamma, 2.))) +
+    1.0 / (M_PI * gamma * (1.0 + pow((ZIn + 2 * Z_m) / gamma, 2.)))
+    );
+  *Bmag = sqrt(pow(*BRad, 2) + pow(*BZ, 2));
+}
+
+double
+integrand_z_psiZ(double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double psi = app->psi_in;
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, ZIn, ctx, &BRad, &BZ, &Bmag);
+  return Bmag / BZ;
+}
+
+double
+z_psiZ(double psiIn, double ZIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  app->psi_in = psiIn;
+  double eps = 0.0;
+  struct gkyl_qr_res integral;
+  if (eps <= ZIn) {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, eps, ZIn, 7, 1e-14);
+  }
+  else {
+    integral = gkyl_dbl_exp(integrand_z_psiZ, ctx, ZIn, eps, 7, 1e-14);
+    integral.res = -integral.res;
+  }
+  return integral.res;
+}
+
+// Invert z(Z) via root-finding.
+double
+root_Z_psiz(double Z, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  return app->z_in - z_psiZ(app->psi_in, Z, ctx);
+}
+
+double
+Z_psiz(double psiIn, double zIn, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double maxL = app->Z_max - app->Z_min;
+  double eps = maxL / app->Nz;   // Interestingly using a smaller eps yields larger errors in some geo quantities.
+  app->psi_in = psiIn;
+  app->z_in = zIn;
+  struct gkyl_qr_res Zout;
+  if (zIn >= 0.0) {
+    double fl = root_Z_psiz(-eps, ctx);
+    double fr = root_Z_psiz(app->Z_max + eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, -eps, app->Z_max + eps, fl, fr, 1000, 1e-14);
+  }
+  else {
+    double fl = root_Z_psiz(app->Z_min - eps, ctx);
+    double fr = root_Z_psiz(eps, ctx);
+    Zout = gkyl_ridders(root_Z_psiz, ctx, app->Z_min - eps, eps, fl, fr, 1000, 1e-14);
+  }
+  return Zout.res;
+}
+
+void
+eval_density_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->NSrcIon;
+}
+
+void
+eval_upar_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->TSrc0Ion;
+}
+
+// Ion initial conditions
+void
+eval_density_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->n0;
+}
+
+void
+eval_upar_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_par_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti_par0;
+}
+
+void
+eval_temp_perp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti_perp0;
+}
+
+void
+evalNuIon(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->nuIon;
+}
+
+// Geometry evaluation functions for the gk app
+// mapc2p must assume a 3d input xc
+void
+mapc2p(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
+{
+  double psi = xc[0];
+  double theta = xc[1];
+  double z = xc[2];
+
+  double Z = Z_psiz(psi, z, ctx);
+  double R = R_psiZ(psi, Z, ctx);
+
+  // Cartesian coordinates on plane perpendicular to Z axis.
+  double x = R * cos(theta);
+  double y = R * sin(theta);
+  xp[0] = x;
+  xp[1] = y;
+  xp[2] = Z;
+}
+
+// bfield_func must assume a 3d input xc
+void
+bfield_func(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[2];
+
+  struct gk_mirror_ctx *app = ctx;
+  double psi = psi_RZ(app->RatZeq0, 0.0, ctx); // Magnetic flux function psi of field line.
+  double Z = Z_psiz(psi, z, ctx);
+  double BRad, BZ, Bmag;
+  Bfield_psiZ(psi, Z, ctx, &BRad, &BZ, &Bmag);
+
+  double phi = xc[1];
+  // zc are computational coords.
+  // Set Cartesian components of magnetic field.
+  fout[0] = BRad * cos(phi);
+  fout[1] = BRad * sin(phi);
+  fout[2] = BZ;
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.4;
+  vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  // Cubic map in mu.
+  vp[1] = mu_max_ion * pow(cmu, 3);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0;
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS;
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+
+  double nuFrac = 1.0;
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Geometry parameters.
+  double RatZeq0 = 0.10; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  // the boundary of a cell (due to AD errors).
+  double Z_min = -3.0;
+  double Z_max = 3.0;
+
+  // Parameters controlling the magnetic equilibrium model.
+  double mcB = 1;
+  double gamma = 0.124904;
+  double Z_m = 1.0;
+
+  // Source parameters
+  double NSrcIon = 3.1715e23 / 8.0 / 40.0 / 2.0 * 1.25;
+  double TSrc0Ion = Ti0 * 1.25;
+
+  // Grid parameters
+  double vpar_max_ion = 16 * vti;
+  double vpar_min_ion = -vpar_max_ion;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+
+  // Computational velocity space limits.
+  double vpar_min_ion_c = -1.0;
+  double vpar_max_ion_c = 1.0;
+  double mu_min_ion_c = 0.;
+  double mu_max_ion_c = 1.;
+
+  // Grid DOF:
+  int Nz = 200; // Number of cells in z direction.
+  int Nvpar = 48; // Number of cells in parallel velocity direction.
+  int Nmu = 16;  // Number of cells in mu direction.
+  int poly_order = 1;
+
+  // Initial conditions parameter.s
+  double Ti_perp0 = 10000 * eV;
+  double Ti_par0 = 7500 * eV;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 5e-7;
+  double tau_fdp = 3e-9;
+  double tau_fdp_extra = 2 * tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim, .vdim = vdim,
+    .mi = mi, .qi = qi,
+    .me = me, .qe = qe,
+    .Te0 = Te0, .Ti0 = Ti0, .n0 = n0,
+    .B_p = B_p, .beta = beta, .tau = tau,
+    .nuFrac = nuFrac, .logLambdaIon = logLambdaIon, .nuIon = nuIon,
+    .vti = vti, .vte = vte, .c_s = c_s,
+    .omega_ci = omega_ci, .rho_s = rho_s,
+    .RatZeq0 = RatZeq0,
+    .Z_min = Z_min, .Z_max = Z_max,
+    // Parameters controlling the magnetic equilibrium model.
+    .mcB = mcB, .gamma = gamma,
+    .Z_m = Z_m,
+    // Initial condition parameters.
+    .Ti_perp0 = Ti_perp0, .Ti_par0 = Ti_par0,
+    // Source parameters
+    .NSrcIon = NSrcIon,
+    .TSrc0Ion = TSrc0Ion,
+    // Physical velocity space limits.
+    .vpar_min_ion = vpar_min_ion,
+    .vpar_max_ion = vpar_max_ion,
+    .mu_max_ion = mu_max_ion,
+    // Computational velocity space limits.
+    .vpar_min_ion_c = vpar_min_ion_c,
+    .vpar_max_ion_c = vpar_max_ion_c,
+    .mu_min_ion_c = mu_min_ion_c,
+    .mu_max_ion_c = mu_max_ion_c,
+    // Grid DOF.
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    // Time integration and I/O parameters.
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+  };
+
+  // Populate a couple more values in the context.
+  ctx.psi_eval = psi_RZ(ctx.RatZeq0, 0., &ctx);
+  ctx.z_min = z_psiZ(ctx.psi_eval, ctx.Z_min, &ctx);
+  ctx.z_max = z_psiZ(ctx.psi_eval, ctx.Z_max, &ctx);
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .num_multipliers = 1,
+    .multiplier[0] = {
+      .type = pparams->fdot_mult_type,
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx->me,
+    .electron_charge = ctx->qe,
+    .electron_temp = ctx->Te0,
+    .polarization_bmag = ctx->B_p,
+    .is_static = pparams->is_static_field,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step at t = %g ...", t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    if (step == 1 || step % 1 == 0)
+      gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr > t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr > t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi, .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { ctx.vpar_min_ion_c, ctx.mu_min_ion_c },
+    .upper = { ctx.vpar_max_ion_c, ctx.mu_max_ion_c },
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_BIMAXWELLIAN,
+      .density = eval_density_ion,
+      .upar = eval_upar_ion,
+      .temppar = eval_temp_par_ion,
+      .tempperp = eval_temp_perp_ion,
+      .ctx_density = &ctx,
+      .ctx_upar = &ctx,
+      .ctx_temppar = &ctx,
+      .ctx_tempperp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .self_nu = evalNuIon,
+      .self_nu_ctx = &ctx,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .density = eval_density_ion_source,
+        .upar = eval_upar_ion_source,
+        .temp = eval_temp_ion_source,
+        .ctx_density = &ctx,
+        .ctx_upar = &ctx,
+        .ctx_temp = &ctx,
+      },
+    },
+
+    .time_rate_multiplier = {
+      .num_multipliers = 1,
+      .multiplier[0] = {
+        .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+        .cellwise_const = true,
+        .write_diagnostics = true,
+      },
+    },
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .num_diag_moments = 4,
+    .diag_moments = { GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_BIMAXWELLIAN },
+  };
+
+  struct gkyl_gyrokinetic_field field = {
+    .gkfield_id = GKYL_GK_FIELD_BOLTZMANN,
+    .electron_mass = ctx.me,
+    .electron_charge = ctx.qe,
+    .electron_temp = ctx.Te0,
+    .is_static = false, // So solvers are allocated.
+  };
+
+  // GK app
+  struct gkyl_gk app_inp = {
+    .name = "gk_mirror_tandem_boltz_elc_poa_1x2v",
+    .cdim = ctx.cdim,
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MAPC2P,
+      .world = { ctx.psi_eval, 0.0 },
+      .mapc2p = mapc2p, // Mapping of computational to physical space.
+      .c2p_ctx = &ctx,
+      .bfield_func = bfield_func, // Magnetic field.
+      .bfield_ctx = &ctx,
+      .position_map_info = {
+        .id = GKYL_PMAP_CONSTANT_DB_NUMERIC,
+        .map_strength = 0.5,
+        .maximum_slope_at_min_B = 2,
+        .gaussian_std = 0.1,
+        .gaussian_max_integration_width = 0.25,
+      },
+    },
+
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+
+    .num_species = 1,
+    .species = { ion },
+
+    .field = field,
+
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "\nRunning phase %d @ t = %.9e ... \n", pit, tfs.t_curr);
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
\ No newline at end of file
diff --git a/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
new file mode 100644
index 0000000000..5d115148e7
--- /dev/null
+++ b/gyrokinetic/creg/rt_gk_wham_kinetic_poa_1x2v_p1.c
@@ -0,0 +1,976 @@
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_const.h>
+#include <gkyl_eqn_type.h>
+#include <gkyl_fem_parproj.h>
+#include <gkyl_fem_poisson_bctype.h>
+#include <gkyl_gyrokinetic.h>
+#include <gkyl_math.h>
+
+#include <rt_arg_parse.h>
+
+// State of the pseudo orbit-averaged integrator.
+enum gk_poa_state {
+  GK_POA_NONE = 0, // Haven't started.
+  GK_POA_OAP, // Orbit averaged phase.
+  GK_POA_FDP, // Full dynamics phase.
+  GK_POA_COMPLETED, // Finished simulation.
+};
+
+struct gk_poa_phase_params {
+  enum gk_poa_state phase; // Type of phase.
+  int num_frames; // Number of frames.
+  double duration; // Duration.
+  double alpha; // Factor multiplying collisionless terms.
+  bool is_static_field; // Whether to evolve the field.
+  bool is_positivity_enabled; // Whether positivity is enabled.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type; // Type of df/dt multipler.
+};
+
+// Define the context of the simulation. This is basically all the globals
+struct gk_mirror_ctx {
+  int cdim, vdim; // Dimensionality.
+  // Plasma parameters
+  double mi;
+  double qi;
+  double me;
+  double qe;
+  double Te0;
+  double n0;
+  double B_p;
+  double beta;
+  double tau;
+  double Ti0;
+  double kperpRhos;
+  // Parameters controlling initial conditions.
+  double alim;
+  double nuFrac;
+  // Electron-electron collision freq.
+  double logLambdaElc;
+  double nuElc;
+  double elc_nuFrac;
+  // Ion-ion collision freq.
+  double logLambdaIon;
+  double nuIon;
+  // Thermal speeds.
+  double vti;
+  double vte;
+  double c_s;
+  // Gyrofrequencies and gyroradii.
+  double omega_ci;
+  double rho_s;
+  double kperp; // Perpendicular wavenumber in SI units.
+  double RatZeq0; // Radius of the field line at Z=0.
+  // Axial coordinate Z extents. Endure that Z=0 is not on
+  double z_min;
+  double z_max;
+  double psi_min;
+  double psi_eval;
+  double psi_max;
+  // Physics parameters at mirror throat
+  double vpar_max_ion;
+  double vpar_max_elc;
+  double mu_max_ion;
+  double mu_max_elc;
+  int Nz;
+  int Nvpar;
+  int Nmu;
+  int cells[GKYL_MAX_DIM]; // Number of cells in all directions.
+  int poly_order;
+
+  double t_end; // End time.
+  int num_frames; // Number of output frames.
+  int num_phases; // Number of phases.
+  struct gk_poa_phase_params *poa_phases; // Phases to run.
+  double write_phase_freq; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol; // Minimum allowable fraction of initial time-step.
+  int num_failures_max; // Maximum allowable number of consecutive small time-steps.
+
+  // Source parameters
+  double source_amplitude;
+  double source_sigma;
+  double ion_source_temp;
+  double elc_source_temp;
+};
+
+void
+eval_density(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 1e17;
+}
+
+void
+eval_upar(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Ti0;
+}
+
+void
+eval_temp_elc(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  fout[0] = app->Te0;
+}
+
+void
+eval_density_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double src_amp = app->source_amplitude;
+  double z_src = 0.0;
+  double src_sigma = app->source_sigma;
+  double src_amp_floor = src_amp * 1e-2;
+  if (fabs(z) <= 1.0) {
+    fout[0] = src_amp * (1 - pow(fabs(z), 6));
+  }
+  else {
+    fout[0] = 1e-16;
+  }
+}
+
+void
+eval_upar_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  fout[0] = 0.0;
+}
+
+void
+eval_temp_ion_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double TSrc0 = app->ion_source_temp;
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 1.0) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+void
+eval_temp_elc_source(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout,
+  void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double TSrc0 = app->elc_source_temp; // Using same temp as ion source for simplicity
+  double Tfloor = TSrc0 * 1e-2;
+  if (fabs(z) <= 1.0) {
+    fout[0] = TSrc0;
+  }
+  else {
+    fout[0] = Tfloor;
+  }
+}
+
+// Potential initial condition
+void
+eval_potential(double t, const double *GKYL_RESTRICT xn, double *GKYL_RESTRICT fout, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double z = xn[0];
+  double z_m = 0.98;
+  double z_max = app->z_max;
+  double sigma = 0.2 * z_m;
+  double center_potential = 8.0 * app->Te0 / app->qi;
+  if (fabs(z) <= sigma) {
+    fout[0] = center_potential;
+  }
+  else {
+    fout[0] = center_potential * (1 - (fabs(z) - sigma) / (z_max - sigma));
+  }
+}
+
+void mapc2p_vel_ion(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_ion = app->vpar_max_ion;
+  double mu_max_ion = app->mu_max_ion;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.45;
+  double linear_velocity_threshold = 1. / 6.;
+  double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b));
+  if (fabs(cvpar) < frac_linear) {
+    double func_frac = tan(frac_linear * b) / tan(b);
+    vp[0] = vpar_max_ion * func_frac * cvpar / frac_linear;
+  }
+  else {
+    vp[0] = vpar_max_ion * tan(cvpar * b) / tan(b);
+  }
+  // Quadratic map in mu.
+  vp[1] = mu_max_ion * pow(cmu, 3);
+}
+
+void mapc2p_vel_elc(double t, const double *vc, double *GKYL_RESTRICT vp, void *ctx)
+{
+  struct gk_mirror_ctx *app = ctx;
+  double vpar_max_elc = app->vpar_max_elc;
+  double mu_max_elc = app->mu_max_elc;
+
+  double cvpar = vc[0], cmu = vc[1];
+  double b = 1.45;
+  double linear_velocity_threshold = 1. / 6.;
+  double frac_linear = 1 / b * atan(linear_velocity_threshold * tan(b));
+  if (fabs(cvpar) < frac_linear) {
+    double func_frac = tan(frac_linear * b) / tan(b);
+    vp[0] = vpar_max_elc * func_frac * cvpar / frac_linear;
+  }
+  else {
+    vp[0] = vpar_max_elc * tan(cvpar * b) / tan(b);
+  }
+  // Quadratic map in mu.
+  vp[1] = mu_max_elc * pow(cmu, 3.0 / 2.0);
+}
+
+struct gk_mirror_ctx
+create_ctx(void)
+{
+  int cdim = 1, vdim = 2; // Dimensionality.
+
+  // Universal constant parameters.
+  double eps0 = GKYL_EPSILON0;
+  double mu0 = GKYL_MU0; // Not sure if this is right
+  double eV = GKYL_ELEMENTARY_CHARGE;
+  double mp = GKYL_PROTON_MASS; // ion mass
+  double me = GKYL_ELECTRON_MASS;
+  double qi = eV;  // ion charge
+  double qe = -eV; // electron charge
+
+  // Plasma parameters.
+  double mi = 2.014 * mp;
+  double Te0 = 940 * eV;
+  double n0 = 3e19;
+  double B_p = 0.53;
+  double beta = 0.4;
+  double tau = pow(B_p, 2.) * beta / (2.0 * mu0 * n0 * Te0) - 1.;
+  double Ti0 = tau * Te0;
+  double kperpRhos = 0.1;
+
+  // Parameters controlling initial conditions.
+  double alim = 0.125;
+  double alphaIC0 = 2;
+  double alphaIC1 = 10;
+
+  double nuFrac = 1.0;
+  double elc_nuFrac = 1 / 5.489216862238348;
+  // Electron-electron collision freq.
+  double logLambdaElc = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Te0 / eV);
+  double nuElc = elc_nuFrac * nuFrac * logLambdaElc * pow(eV, 4.) * n0 /
+    (6. * sqrt(2.) * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(me) * pow(Te0, 3. / 2.));
+  // Ion-ion collision freq.
+  double logLambdaIon = 6.6 - 0.5 * log(n0 / 1e20) + 1.5 * log(Ti0 / eV);
+  double nuIon = nuFrac * logLambdaIon * pow(eV, 4.) * n0 /
+    (12 * pow(M_PI, 3. / 2.) * pow(eps0, 2.) * sqrt(mi) * pow(Ti0, 3. / 2.));
+
+  // Thermal speeds.
+  double vti = sqrt(Ti0 / mi);
+  double vte = sqrt(Te0 / me);
+  double c_s = sqrt(Te0 / mi);
+
+  // Gyrofrequencies and gyroradii.
+  double omega_ci = eV * B_p / mi;
+  double rho_s = c_s / omega_ci;
+
+  // Perpendicular wavenumber in SI units:
+  double kperp = kperpRhos / rho_s;
+
+  // Geometry parameters.
+  double z_min = -2.0;
+  double z_max = 2.0;
+  double psi_eval = 1e-3;
+
+  // Grid parameters
+  double vpar_max_elc = 30 * vte;
+  double mu_max_elc = me * pow(3. * vte, 2.) / (2. * B_p);
+  double vpar_max_ion = 30 * vti;
+  double mu_max_ion = mi * pow(3. * vti, 2.) / (2. * B_p);
+  int Nz = 32;
+  int Nvpar = 32; // Number of cells in the paralell velocity direction 96
+  int Nmu = 16;  // Number of cells in the mu direction 192
+  int poly_order = 1;
+
+  // Factor multiplying collisionless terms.
+  double alpha_oap = 0.01;
+  double alpha_fdp = 1.0;
+  // Duration of each phase.
+  double tau_oap = 1.5e-8;
+  double tau_fdp = 1.5e-10;
+  double tau_fdp_extra = 2 * tau_fdp;
+  int num_cycles = 2; // Number of OAP+FDP cycles to run.
+
+  // Frame counts for each phase type (specified independently)
+  int num_frames_oap = 1; // Frames per OAP phase
+  int num_frames_fdp = 1; // Frames per FDP phase
+  int num_frames_fdp_extra = 2 * num_frames_fdp;  // Frames for the extra FDP phase
+
+  // Whether to evolve the field.
+  bool is_static_field_oap = true;
+  bool is_static_field_fdp = false;
+  // Whether to enable positivity.
+  bool is_positivity_enabled_oap = false;
+  bool is_positivity_enabled_fdp = true;
+  // Type of df/dt multipler.
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_oap = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE;
+  enum gkyl_gyrokinetic_fdot_multiplier_type fdot_mult_type_fdp = GKYL_GK_FDOT_MULTIPLIER_NONE;
+
+  // Calculate phase structure
+  double t_end = (tau_oap + tau_fdp) * num_cycles + tau_fdp_extra;
+  double tau_pair = tau_oap + tau_fdp; // Duration of an OAP+FDP pair.
+  int num_phases = 2 * num_cycles + 1;
+  int num_frames = num_cycles * (num_frames_oap + num_frames_fdp) + num_frames_fdp_extra;
+
+  struct gk_poa_phase_params *poa_phases = gkyl_malloc(num_phases *
+    sizeof(struct gk_poa_phase_params));
+  for (int i = 0; i < (num_phases - 1) / 2; i++) {
+    // OAPs.
+    poa_phases[2 * i].phase = GK_POA_OAP;
+    poa_phases[2 * i].num_frames = num_frames_oap;
+    poa_phases[2 * i].duration = tau_oap;
+    poa_phases[2 * i].alpha = alpha_oap;
+    poa_phases[2 * i].is_static_field = is_static_field_oap;
+    poa_phases[2 * i].fdot_mult_type = fdot_mult_type_oap;
+    poa_phases[2 * i].is_positivity_enabled = is_positivity_enabled_oap;
+
+    // FDPs.
+    poa_phases[2 * i + 1].phase = GK_POA_FDP;
+    poa_phases[2 * i + 1].num_frames = num_frames_fdp;
+    poa_phases[2 * i + 1].duration = tau_fdp;
+    poa_phases[2 * i + 1].alpha = alpha_fdp;
+    poa_phases[2 * i + 1].is_static_field = is_static_field_fdp;
+    poa_phases[2 * i + 1].fdot_mult_type = fdot_mult_type_fdp;
+    poa_phases[2 * i + 1].is_positivity_enabled = is_positivity_enabled_fdp;
+  }
+  // Add an extra, longer FDP.
+  poa_phases[num_phases - 1].phase = GK_POA_FDP;
+  poa_phases[num_phases - 1].num_frames = num_frames_fdp_extra;
+  poa_phases[num_phases - 1].duration = tau_fdp_extra;
+  poa_phases[num_phases - 1].alpha = alpha_fdp;
+  poa_phases[num_phases - 1].is_static_field = is_static_field_fdp;
+  poa_phases[num_phases - 1].fdot_mult_type = fdot_mult_type_fdp;
+  poa_phases[num_phases - 1].is_positivity_enabled = is_positivity_enabled_fdp;
+
+  double write_phase_freq = 0.5; // Frequency of writing phase-space diagnostics (as a fraction of num_frames).
+  double int_diag_calc_freq = 5; // Frequency of calculating integrated diagnostics (as a factor of num_frames).
+  double dt_failure_tol = 1.0e-4; // Minimum allowable fraction of initial time-step.
+  int num_failures_max = 20; // Maximum allowable number of consecutive small time-steps.
+
+  // Source parameters
+  double source_amplitude = 1.e20;
+  double source_sigma = 0.5;
+  double ion_source_temp = 5000. * eV;
+  double elc_source_temp = 5000. * eV; // Using same temp as ion source for simplicity
+
+  struct gk_mirror_ctx ctx = {
+    .cdim = cdim,
+    .vdim = vdim,
+    .mi = mi,
+    .qi = qi,
+    .me = me,
+    .qe = qe,
+    .Te0 = Te0,
+    .n0 = n0,
+    .B_p = B_p,
+    .beta = beta,
+    .tau = tau,
+    .Ti0 = Ti0,
+    .kperpRhos = kperpRhos,
+    .alim = alim,
+    .nuFrac = nuFrac,
+    .logLambdaElc = logLambdaElc,
+    .nuElc = nuElc,
+    .elc_nuFrac = elc_nuFrac,
+    .logLambdaIon = logLambdaIon,
+    .nuIon = nuIon,
+    .vti = vti,
+    .vte = vte,
+    .c_s = c_s,
+    .omega_ci = omega_ci,
+    .rho_s = rho_s,
+    .kperp = kperp,
+    .z_min = z_min,
+    .z_max = z_max,
+    .psi_eval = psi_eval,
+    .vpar_max_ion = vpar_max_ion,
+    .vpar_max_elc = vpar_max_elc,
+    .mu_max_ion = mu_max_ion,
+    .mu_max_elc = mu_max_elc,
+    .Nz = Nz,
+    .Nvpar = Nvpar,
+    .Nmu = Nmu,
+    .cells = { Nz, Nvpar, Nmu },
+    .poly_order = poly_order,
+    .t_end = t_end,
+    .num_frames = num_frames,
+    .num_phases = num_phases,
+    .poa_phases = poa_phases,
+    .write_phase_freq = write_phase_freq,
+    .int_diag_calc_freq = int_diag_calc_freq,
+    .dt_failure_tol = dt_failure_tol,
+    .num_failures_max = num_failures_max,
+  };
+
+  return ctx;
+}
+
+void
+release_ctx(struct gk_mirror_ctx *ctx)
+{
+  gkyl_free(ctx->poa_phases);
+}
+
+void
+calc_integrated_diagnostics(struct gkyl_tm_trigger *iot, gkyl_gyrokinetic_app *app,
+  double t_curr, bool force_calc, double dt)
+{
+  if (gkyl_tm_trigger_check_and_bump(iot, t_curr) || force_calc) {
+    gkyl_gyrokinetic_app_calc_field_energy(app, t_curr);
+    gkyl_gyrokinetic_app_calc_integrated_mom(app, t_curr);
+
+    if (!(dt < 0.0) )
+      gkyl_gyrokinetic_app_save_dt(app, t_curr, dt);
+  }
+}
+
+void
+write_data(struct gkyl_tm_trigger *iot_conf, struct gkyl_tm_trigger *iot_phase,
+  gkyl_gyrokinetic_app *app, double t_curr, bool force_write)
+{
+  bool trig_now_conf = gkyl_tm_trigger_check_and_bump(iot_conf, t_curr);
+  if (trig_now_conf || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+    gkyl_gyrokinetic_app_write_conf(app, t_curr, frame);
+
+    gkyl_gyrokinetic_app_write_field_energy(app);
+    gkyl_gyrokinetic_app_write_integrated_mom(app);
+    gkyl_gyrokinetic_app_write_dt(app);
+  }
+
+  bool trig_now_phase = gkyl_tm_trigger_check_and_bump(iot_phase, t_curr);
+  if (trig_now_phase || force_write) {
+    int frame = (!trig_now_conf) && force_write? iot_conf->curr : iot_conf->curr - 1;
+
+    gkyl_gyrokinetic_app_write_phase(app, t_curr, frame);
+  }
+}
+
+struct time_frame_state {
+  double t_curr; // Current simulation time.
+  double t_end; // End time of current phase.
+  int frame_curr; // Current frame.
+  int num_frames; // Number of frames at the end of current phase.
+};
+
+void reset_io_triggers(struct gk_mirror_ctx *ctx, struct time_frame_state *tfs,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag)
+{
+  // Reset I/O triggers:
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+  int frame_curr = tfs->frame_curr;
+  int num_frames = tfs->num_frames;
+  int num_int_diag_calc = ctx->int_diag_calc_freq * num_frames;
+
+  // Prevent division by zero when frame_curr equals num_frames
+  int frames_remaining = num_frames - frame_curr;
+  double time_remaining = t_end - t_curr;
+
+  trig_write_conf->dt = time_remaining / frames_remaining;
+  trig_write_conf->tcurr = t_curr;
+  trig_write_conf->curr = frame_curr;
+
+  trig_write_phase->dt = time_remaining / (ctx->write_phase_freq * frames_remaining);
+  trig_write_phase->tcurr = t_curr;
+  trig_write_phase->curr = frame_curr;
+
+  int diag_frames = GKYL_MAX2(frames_remaining,
+    (num_int_diag_calc / num_frames) * frames_remaining);
+  trig_calc_intdiag->dt = time_remaining / diag_frames;
+  trig_calc_intdiag->tcurr = t_curr;
+  trig_calc_intdiag->curr = frame_curr;
+}
+
+void run_phase(gkyl_gyrokinetic_app *app, struct gk_mirror_ctx *ctx, double num_steps,
+  struct gkyl_tm_trigger *trig_write_conf, struct gkyl_tm_trigger *trig_write_phase,
+  struct gkyl_tm_trigger *trig_calc_intdiag, struct time_frame_state *tfs,
+  struct gk_poa_phase_params *pparams)
+{
+  tfs->t_end = tfs->t_curr + pparams->duration;
+  tfs->num_frames = tfs->frame_curr + pparams->num_frames;
+
+  // Run an OAP or FDP.
+  double t_curr = tfs->t_curr;
+  double t_end = tfs->t_end;
+
+  // Reset I/O triggers:
+  reset_io_triggers(ctx, tfs, trig_write_conf, trig_write_phase, trig_calc_intdiag);
+
+  // Reset simulation parameters and function pointers.
+  struct gkyl_gyrokinetic_collisionless collisionless_inp = {
+    .type = GKYL_GK_COLLISIONLESS_ES,
+    .scale_factor = pparams->alpha,
+  };
+  struct gkyl_gyrokinetic_fdot_multiplier fdot_mult_inp = {
+    .num_multipliers = 1,
+    .multiplier[0] = {
+      .type = pparams->fdot_mult_type,
+      .cellwise_const = true,
+      .write_diagnostics = true,
+    },
+  };
+  struct gkyl_gyrokinetic_field field_inp = {
+    .polarization_bmag = ctx->B_p,
+    .kperpSq = pow(ctx->kperp, 2.),
+    .is_static = pparams->is_static_field,
+    .time_rate_diagnostics = true,
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+  struct gkyl_gyrokinetic_positivity positivity_inp = {
+    .type = pparams->is_positivity_enabled? GKYL_GK_POSITIVITY_SHIFT : GKYL_GK_POSITIVITY_NONE,
+    .write_diagnostics = pparams->is_positivity_enabled,
+  };
+
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "ion", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_collisionless(app, t_curr, "elc", collisionless_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "ion", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_fdot_multiplier(app, t_curr, "elc", fdot_mult_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "ion", positivity_inp);
+  gkyl_gyrokinetic_app_reset_species_positivity(app, t_curr, "elc", positivity_inp);
+  gkyl_gyrokinetic_app_reset_field(app, t_curr, field_inp);
+
+  // Compute initial guess of maximum stable time-step.
+  double dt = t_end - t_curr;
+
+  // Initialize small time-step check.
+  double dt_init = -1.0, dt_failure_tol = ctx->dt_failure_tol;
+  int num_failures = 0, num_failures_max = ctx->num_failures_max;
+
+  long step = 1;
+  while ((t_curr < t_end) && (step <= num_steps)) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Taking time-step %ld at t = %g ...", step, t_curr);
+
+    dt = fmin(dt, t_end - t_curr); // Don't step beyond t_end.
+    struct gkyl_update_status status = gkyl_gyrokinetic_update(app, dt);
+
+    gkyl_gyrokinetic_app_cout(app, stdout, " dt = %g\n", status.dt_actual);
+
+    if (!status.success) {
+      gkyl_gyrokinetic_app_cout(app, stdout, "** Update method failed! Aborting simulation ....\n");
+      break;
+    }
+    t_curr += status.dt_actual;
+    dt = status.dt_suggested;
+
+    calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, t_curr >= t_end, status.dt_actual);
+    write_data(trig_write_conf, trig_write_phase, app, t_curr, t_curr >= t_end);
+
+    if (dt_init < 0.0) {
+      dt_init = status.dt_actual;
+    }
+    else if (status.dt_actual < dt_failure_tol * dt_init) {
+      num_failures += 1;
+
+      gkyl_gyrokinetic_app_cout(app, stdout, "WARNING: Time-step dt = %g", status.dt_actual);
+      gkyl_gyrokinetic_app_cout(app, stdout, " is below %g*dt_init ...", dt_failure_tol);
+      gkyl_gyrokinetic_app_cout(app, stdout, " num_failures = %d\n", num_failures);
+      if (num_failures >= num_failures_max) {
+        gkyl_gyrokinetic_app_cout(app, stdout, "ERROR: Time-step was below %g*dt_init ",
+          dt_failure_tol);
+        gkyl_gyrokinetic_app_cout(app, stdout, "%d consecutive times. Aborting simulation ....\n",
+          num_failures_max);
+        calc_integrated_diagnostics(trig_calc_intdiag, app, t_curr, true, status.dt_actual);
+        write_data(trig_write_conf, trig_write_phase, app, t_curr, true);
+        break;
+      }
+    }
+    else {
+      num_failures = 0;
+    }
+    step += 1;
+  }
+
+  tfs->t_curr = t_curr;
+  tfs->frame_curr = tfs->frame_curr + pparams->num_frames;
+}
+
+int main(int argc, char **argv)
+{
+  struct gkyl_app_args app_args = parse_app_args(argc, argv);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi) MPI_Init(&argc, &argv);
+#endif
+
+  if (app_args.trace_mem) {
+    gkyl_cu_dev_mem_debug_set(true);
+    gkyl_mem_debug_set(true);
+  }
+
+  struct gk_mirror_ctx ctx = create_ctx(); // Context for init functions.
+
+  int cells_x[ctx.cdim], cells_v[ctx.vdim];
+  for (int d = 0; d < ctx.cdim; d++) {
+    cells_x[d] = APP_ARGS_CHOOSE(app_args.xcells[d], ctx.cells[d]);
+  }
+  for (int d = 0; d < ctx.vdim; d++) {
+    cells_v[d] = APP_ARGS_CHOOSE(app_args.vcells[d], ctx.cells[ctx.cdim + d]);
+  }
+
+  // Construct communicator for use in app.
+  struct gkyl_comm *comm = gkyl_gyrokinetic_comms_new(app_args.use_mpi, app_args.use_gpu, stderr);
+
+  struct gkyl_gyrokinetic_species elc = {
+    .name = "elc",
+    .charge = ctx.qe,
+    .mass = ctx.me,
+    .vdim = ctx.vdim,
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
+
+    .polarization_density = ctx.n0,
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_elc,
+      .ctx = &ctx,
+    },
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density,
+      .ctx_density = &ctx,
+      .upar = eval_upar,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_elc,
+      .ctx_temp = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Te0,
+      .num_cross_collisions = 1,
+      .collide_with = { "ion" },
+      .write_diagnostics = true,
+    },
+
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_elc_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 5,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+      }
+    },
+
+    .time_rate_multiplier = {
+      .num_multipliers = 1,
+      .multiplier[0] = {
+        .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+        .cellwise_const = true,
+        .write_diagnostics = true,
+      },
+    },
+    
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_HAMILTONIAN },
+    },
+  };
+
+  struct gkyl_gyrokinetic_species ion = {
+    .name = "ion",
+    .charge = ctx.qi,
+    .mass = ctx.mi,
+    .vdim = ctx.vdim,
+    .lower = { -1.0, 0.0 },
+    .upper = { 1.0, 1.0 },
+    .cells = { cells_v[0], cells_v[1] },
+    .polarization_density = ctx.n0,
+    .scale_with_polarization = true,
+
+    .projection = {
+      .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+      .density = eval_density,
+      .ctx_density = &ctx,
+      .upar = eval_upar,
+      .ctx_upar = &ctx,
+      .temp = eval_temp_ion,
+      .ctx_temp = &ctx,
+    },
+
+    .mapc2p = {
+      .mapping = mapc2p_vel_ion,
+      .ctx = &ctx,
+    },
+
+    .collisionless = {
+      .type = GKYL_GK_COLLISIONLESS_ES,
+      .scale_factor = 1.0, // Will be replaced below.
+    },
+
+    .time_rate_multiplier = {
+      .num_multipliers = 1,
+      .multiplier[0] = {
+        .type = GKYL_GK_FDOT_MULTIPLIER_LOSS_CONE,
+        .cellwise_const = true,
+        .write_diagnostics = true,
+      },
+    },
+
+    .collisions = {
+      .collision_id = GKYL_LBO_COLLISIONS,
+      .den_ref = ctx.n0,
+      .temp_ref = ctx.Ti0,
+      .num_cross_collisions = 1,
+      .collide_with = { "elc" },
+      .write_diagnostics = true,
+    },
+    .source = {
+      .source_id = GKYL_PROJ_SOURCE,
+      .num_sources = 1,
+      .projection[0] = {
+        .proj_id = GKYL_PROJ_MAXWELLIAN_PRIM,
+        .ctx_density = &ctx,
+        .density = eval_density_source,
+        .ctx_upar = &ctx,
+        .upar = eval_upar_source,
+        .ctx_temp = &ctx,
+        .temp = eval_temp_ion_source,
+      },
+      .diagnostics = {
+        .num_diag_moments = 6,
+        .diag_moments = { GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1, GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR,
+                          GKYL_F_MOMENT_M2PERP, GKYL_F_MOMENT_HAMILTONIAN },
+        .num_integrated_diag_moments = 1,
+        .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+      },
+    },
+
+    .positivity = {
+      .type = GKYL_GK_POSITIVITY_SHIFT,
+      .write_diagnostics = true,
+    },
+
+    .bcs = {
+      { .dir = 0, .edge = GKYL_LOWER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+      { .dir = 0, .edge = GKYL_UPPER_EDGE, .type = GKYL_BC_GK_SPECIES_SHEATH, },
+    },
+    .write_omega_cfl = true,
+    .num_diag_moments = 8,
+    .diag_moments = { GKYL_F_MOMENT_BIMAXWELLIAN, GKYL_F_MOMENT_M0, GKYL_F_MOMENT_M1,
+                      GKYL_F_MOMENT_M2, GKYL_F_MOMENT_M2PAR, GKYL_F_MOMENT_M2PERP,
+                      GKYL_F_MOMENT_M3PAR, GKYL_F_MOMENT_M3PERP },
+    .num_integrated_diag_moments = 1,
+    .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    .time_rate_diagnostics = true,
+
+    .boundary_flux_diagnostics = {
+      .num_integrated_diag_moments = 1,
+      .integrated_diag_moments = { GKYL_F_MOMENT_M0M1M2PARM2PERP },
+    },
+  };
+  struct gkyl_gyrokinetic_field field = {
+    .polarization_bmag = ctx.B_p,
+    .kperpSq = pow(ctx.kperp, 2.),
+    .time_rate_diagnostics = true,
+    .is_static = false,
+    .polarization_potential = eval_potential,
+    .polarization_potential_ctx = &ctx,
+  };
+
+  struct gkyl_mirror_geo_grid_inp grid_inp = {
+    .filename_psi = "gyrokinetic/data/unit/wham_hires.geqdsk_psi.gkyl", // psi file to use
+    .rclose = 0.2, // closest R to region of interest
+    .zmin = -2.0,  // Z of lower boundary
+    .zmax = 2.0,   // Z of upper boundary
+    .include_axis = false, // Include R=0 axis in grid
+    .fl_coord = GKYL_GEOMETRY_MIRROR_GRID_GEN_PSI_CART_Z, // coordinate system for psi grid
+  };
+
+  struct gkyl_gk app_inp = {  // GK app
+    .name = "gk_wham_kinetic_poa_1x2v_p1",
+    .cdim = ctx.cdim,
+    .lower = { ctx.z_min },
+    .upper = { ctx.z_max },
+    .cells = { cells_x[0] },
+    .poly_order = ctx.poly_order,
+    .basis_type = app_args.basis_type,
+    .geometry = {
+      .geometry_id = GKYL_GEOMETRY_MIRROR,
+      .world = { ctx.psi_eval, 0.0 },
+      .mirror_grid_info = grid_inp,
+    },
+    .num_periodic_dir = 0,
+    .periodic_dirs = {},
+    .num_species = 2,
+    .species = { elc, ion },
+    .field = field,
+    .parallelism = {
+      .use_gpu = app_args.use_gpu,
+      .cuts = { app_args.cuts[0] },
+      .comm = comm,
+    },
+  };
+
+  // Set app output name from the executable name (argv[0]).
+  snprintf(app_inp.name, sizeof(app_inp.name), "%s", app_args.app_name);
+  
+  // Create app object.
+  gkyl_gyrokinetic_app *app = gkyl_gyrokinetic_app_new(&app_inp);
+
+  // Triggers for IO.
+  struct gkyl_tm_trigger trig_write_conf, trig_write_phase, trig_calc_intdiag;
+
+  struct time_frame_state tfs = {
+    .t_curr = 0.0, // Initial simulation time.
+    .frame_curr = 0, // Initial frame.
+    .t_end = ctx.poa_phases[0].duration, // Final time of 1st phase.
+    .num_frames = ctx.poa_phases[0].num_frames, // Number of frames in 1st phase.
+  };
+
+  int phase_idx_init = 0, phase_idx_end = ctx.num_phases; // Initial and final phase index.
+  if (app_args.is_restart) {
+    struct gkyl_app_restart_status status = gkyl_gyrokinetic_app_read_from_frame(app,
+      app_args.restart_frame);
+
+    if (status.io_status != GKYL_ARRAY_RIO_SUCCESS) {
+      gkyl_gyrokinetic_app_cout(app, stderr, "*** Failed to read restart file! (%s)\n",
+        gkyl_array_rio_status_msg(status.io_status));
+      goto freeresources;
+    }
+
+    tfs.frame_curr = status.frame;
+    tfs.t_curr = status.stime;
+
+    // Find out what phase we are in.
+    double time_count = 0.0;
+    int frame_count = 0;
+    int pit_curr = 0;
+    for (int pit = 0; pit < ctx.num_phases; pit++) {
+      time_count += ctx.poa_phases[pit].duration;
+      frame_count += ctx.poa_phases[pit].num_frames;
+      if ((tfs.t_curr <= time_count) && (tfs.frame_curr <= frame_count)) {
+        pit_curr = pit;
+        break;
+      }
+    }
+    ;
+    phase_idx_init = pit_curr;
+
+    // Change the duration and number frames so this phase reaches the expected
+    // time and number of frames and not beyond.
+    struct gk_poa_phase_params *pparams = &ctx.poa_phases[phase_idx_init];
+    pparams->num_frames = frame_count - tfs.frame_curr;
+    pparams->duration = time_count - tfs.t_curr;
+
+    gkyl_gyrokinetic_app_cout(app, stdout, "Restarting from frame %d", tfs.frame_curr);
+    gkyl_gyrokinetic_app_cout(app, stdout, " at time = %g\n", tfs.t_curr);
+  }
+  else {
+    gkyl_gyrokinetic_app_apply_ic(app, tfs.t_curr);
+
+    // Write out ICs.
+    reset_io_triggers(&ctx, &tfs, &trig_write_conf, &trig_write_phase, &trig_calc_intdiag);
+
+    calc_integrated_diagnostics(&trig_calc_intdiag, app, tfs.t_curr, true, -1.0);
+    write_data(&trig_write_conf, &trig_write_phase, app, tfs.t_curr, true);
+  }
+
+  if (app_args.num_steps != INT_MAX)
+    phase_idx_end = 1;
+
+  // Loop over number of number of phases;
+  for (int pit = phase_idx_init; pit < phase_idx_end; pit++) {
+    struct gk_poa_phase_params *phase_params = &ctx.poa_phases[pit];
+    run_phase(app, &ctx, app_args.num_steps, &trig_write_conf, &trig_write_phase,
+      &trig_calc_intdiag, &tfs, phase_params);
+  }
+
+  gkyl_gyrokinetic_app_stat_write(app);
+
+  struct gkyl_gyrokinetic_stat stat = gkyl_gyrokinetic_app_stat(app); // fetch simulation statistics
+  gkyl_gyrokinetic_app_cout(app, stdout, "\n");
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of update calls %ld\n", stat.nup);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of forward-Euler calls %ld\n", stat.nfeuler);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-2 failures %ld\n", stat.nstage_2_fail);
+  if (stat.nstage_2_fail > 0) {
+    gkyl_gyrokinetic_app_cout(app, stdout, "Max rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[1]);
+    gkyl_gyrokinetic_app_cout(app, stdout, "Min rel dt diff for RK stage-2 failures %g\n",
+      stat.stage_2_dt_diff[0]);
+  }
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of RK stage-3 failures %ld\n", stat.nstage_3_fail);
+  gkyl_gyrokinetic_app_cout(app, stdout, "Number of write calls %ld\n", stat.n_io);
+  gkyl_gyrokinetic_app_print_timings(app, stdout);
+
+freeresources:
+  // simulation complete, free app
+  gkyl_gyrokinetic_app_release(app);
+  gkyl_gyrokinetic_comms_release(comm);
+  release_ctx(&ctx);
+
+#ifdef GKYL_HAVE_MPI
+  if (app_args.use_mpi)
+    MPI_Finalize();
+#endif
+  return 0;
+}
diff --git a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
index ae0e96a7b0..07c910376a 100644
--- a/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
+++ b/gyrokinetic/creg/rt_gk_wham_nonuniformx_2x2v_p1.c
@@ -812,7 +812,7 @@ int main(int argc, char **argv)
         .map_strength = 0.5,
         .maximum_slope_at_min_B = 2,
         .gaussian_std = 0.2,
-        .gaussian_max_integration_width = 1.0,
+        .gaussian_max_integration_width = 0.5,
       },
     },
 
diff --git a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
index fa3caa80a7..4ecb3dd811 100644
--- a/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/unit/ctest_loss_cone_mask_gyrokinetic.c
@@ -1,178 +1,354 @@
 #include <acutest.h>
 
-#include <gkyl_util.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <float.h>
+#include <stdio.h>
+#include <assert.h>
+
 #include <gkyl_array.h>
 #include <gkyl_array_ops.h>
 #include <gkyl_array_rio.h>
-#include <gkyl_range.h>
-#include <gkyl_rect_decomp.h>
-#include <gkyl_rect_grid.h>
+#include <gkyl_const.h>
+#include <gkyl_eval_on_nodes.h>
 #include <gkyl_gk_geometry.h>
 #include <gkyl_gk_geometry_mapc2p.h>
-#include <gkyl_velocity_map.h>
-#include <gkyl_position_map.h>
-#include <gkyl_eval_on_nodes.h>
-#include <gkyl_proj_on_basis.h>
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
-#include <gkyl_const.h>
+#include <gkyl_position_map.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_grid.h>
+#include <gkyl_util.h>
+#include <gkyl_velocity_map.h>
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
 
 struct loss_cone_mask_test_ctx {
-  int cdim; // Configuration space dimensionality.
-  double eV; // Elementary charge.
-  double R_m; // Mirror ratio.
-  double B_m; // Maximum magnetic field amplitude.
-  double z_m; // Location of B_m.
-  double mass, charge; // Species mass and charge.
-  double n0, T0, B0; // Reference parameters.
-  double phi_fac; // phi(z=0) = phi_fac*T0/e;
-  double z_max, vpar_max, mu_max; // Upper grid extents.
-  int Nz, Nvpar, Nmu; // Number of cells in each direction.
-  enum gkyl_quad_type quad_type; // Type of quadrature/nodes.
-  int num_quad; // Number of quadrature points to use in projection, 1 or p+1.
-  bool cellwise_trap_loss; // Whether a whole cell is either trapped or lost.
+  int cdim;
+  double eV;
+  double R_m;
+  double B_m;
+  double z_m;
+  double mass, charge;
+  double n0, T0, B0;
+  double phi_fac;
+  double z_max, vpar_max, mu_max;
+  int Nz, Nvpar, Nmu;
 };
 
-// allocate array (filled with zeros)
 static struct gkyl_array*
 mkarr(bool use_gpu, long nc, long size)
 {
-  struct gkyl_array* a = use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
-	                        : gkyl_array_new(GKYL_DOUBLE, nc, size);
-  return a;
+  return use_gpu ? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+                 : gkyl_array_new(GKYL_DOUBLE, nc, size);
 }
 
 void
-mapc2p_3x(double t, const double *xc, double* GKYL_RESTRICT xp, void *ctx)
+mapc2p_3x(double t, const double *xc, double *GKYL_RESTRICT xp, void *ctx)
 {
-  xp[0] = xc[0]; xp[1] = xc[1]; xp[2] = xc[2];
+  xp[0] = xc[0];
+  xp[1] = xc[1];
+  xp[2] = xc[2];
 }
 
 void
-bfield_func_3x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+bfield_func_3x(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double x = xc[0], y = xc[1], z = xc[2];
-
+  double z = xc[2];
   struct loss_cone_mask_test_ctx *params = ctx;
-  double R_m = params->R_m; // Mirror ratio.
-  double B_m = params->B_m; // Maximum magnetic field amplitude.
 
   fout[0] = 0.0;
   fout[1] = 0.0;
-  fout[2] = B_m * (1.0 - ((R_m-1.0)/R_m)*pow(cos(z), 2.0));
-//  fout[0] = (B_m/R_m) * (1.0 + (R_m-1.0)*pow(sin(z), 2.0));
+  fout[2] = params->B_m * (1.0 - ((params->R_m - 1.0) / params->R_m) * pow(cos(z), 2.0));
 }
 
 void
-phi_func_1x(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+phi_func_1x_zero(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
 {
-  double z = xc[0];
+  fout[0] = 0.0;
+}
 
+void
+phi_func_1x_nonzero(double t, const double *xc, double *GKYL_RESTRICT fout, void *ctx)
+{
+  double z = xc[0];
   struct loss_cone_mask_test_ctx *params = ctx;
-  double phi_fac = params->phi_fac;
-  double T0 = params->T0;
-  double eV = params->eV;
 
-  fout[0] = 0.0; //0.5 * phi_fac*T0/eV * (1.0 + cos(z));
+  // fout[0] = params->phi_fac * params->T0 / params->eV * (1.0 - pow(z / params->z_max, 2.0));
+  fout[0] = params->phi_fac * params->T0 / params->eV * (1.0 - pow(cos(4*z), 2.0));
 }
 
-void
-mask_ref_1x2v(double t, const double *xc, double* GKYL_RESTRICT fout, void *ctx)
+static inline void
+corner_coords_range(int ndim, int corner, double *eta)
 {
-  double z = xc[0], vpar = xc[1], mu = xc[2];
-  struct loss_cone_mask_test_ctx *params = ctx;
+  for (int d = 0; d < ndim; ++d) {
+    int bit = ndim - 1 - d;
+    eta[d] = ((corner >> bit) & 1) ? 1.0 : -1.0;
+  }
+}
 
-  double z_m = params->z_m;
-  double mass = params->mass;
-  double charge = params->charge;
-
-  double phi, phi_m;
-  phi_func_1x(t, xc, &phi, ctx);
-  phi_func_1x(t, &z_m, &phi_m, ctx);
-
-  double bfield[3], bmag;
-  double zinfl[3] = {0.0}, z_minfl[3] = {0.0};
-  zinfl[2] = z, z_minfl[2] = z_m;
-  bfield_func_3x(t, zinfl, bfield, ctx);
-  bmag = bfield[2];
-
-  double bfield_m[3], bmag_m;
-  bfield_func_3x(t, z_minfl, bfield_m, ctx);
-  bmag_m = bfield_m[2];
-
-  // mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
-  double mu_bound = (0.5*mass*pow(vpar,2)+charge*(phi-phi_m))/(bmag*(bmag_m/bmag-1));
-  if (mu_bound < mu && fabs(z) < z_m)
-    fout[0] = 1.0;
-  else
-    fout[0] = 0;
+static struct gkyl_array*
+mk_basis_at_corners(const struct gkyl_basis *basis)
+{
+  int ndim = basis->ndim;
+  int num_corners = 1 << ndim;
+  struct gkyl_array *out = gkyl_array_new(GKYL_DOUBLE, basis->num_basis, num_corners);
+
+  double eta[GKYL_MAX_DIM] = { 0.0 };
+  for (int corner = 0; corner < num_corners; ++corner) {
+    corner_coords_range(ndim, corner, eta);
+    basis->eval(eta, gkyl_array_fetch(out, corner));
+  }
+
+  return out;
 }
 
-void
-test_1x2v_gk(int poly_order, bool use_gpu)
+static inline double
+field_corner_val(const struct gkyl_array *arr, const struct gkyl_array *basis_at_corners,
+  int num_basis, long linidx, int corner)
+{
+  const double *arr_d = gkyl_array_cfetch(arr, linidx);
+  const double *basis_d = gkyl_array_cfetch(basis_at_corners, corner);
+
+  double val = 0.0;
+  for (int k = 0; k < num_basis; ++k) {
+    val += arr_d[k] * basis_d[k];
+  }
+  return val;
+}
+
+static inline double
+vel_corner_val(const struct gkyl_velocity_map *gvm, const int *vel_idx, int vd, int vel_corner,
+  int vdim)
+{
+  long linidx_vel = gkyl_range_idx(&gvm->local_ext_vel, vel_idx);
+  const double *vmap_d = gkyl_array_cfetch(gvm->vmap, linidx_vel);
+
+  double vel_eta[GKYL_MAX_DIM] = { 0.0 };
+  corner_coords_range(vdim, vel_corner, vel_eta);
+  double xcomp[1] = { vel_eta[vd] };
+
+  return gvm->vmap_basis->eval_expand(xcomp, vmap_d + vd * gvm->vmap_basis->num_basis);
+}
+
+static inline int
+corner_z_endpoint_side_ref(int cdim, int corner, int zdim)
 {
+  double eta[GKYL_MAX_DIM] = { 0.0 };
+  corner_coords_range(cdim, corner, eta);
+  return eta[zdim] > 0.0 ? 1 : 0;
+}
+
+static inline int
+corner_with_z_side_ref(int cdim, int anchor_corner, int zdim, int z_side)
+{
+  int num_conf_corners = 1 << cdim;
+
+  double eta_anchor[GKYL_MAX_DIM] = { 0.0 };
+  corner_coords_range(cdim, anchor_corner, eta_anchor);
+
+  for (int cand = 0; cand < num_conf_corners; ++cand) {
+    if (corner_z_endpoint_side_ref(cdim, cand, zdim) != z_side) {
+      continue;
+    }
+
+    double eta_cand[GKYL_MAX_DIM] = { 0.0 };
+    corner_coords_range(cdim, cand, eta_cand);
+
+    bool same_transverse = true;
+    for (int d = 0; d < cdim; ++d) {
+      if (d == zdim) {
+        continue;
+      }
+      if (eta_cand[d] != eta_anchor[d]) {
+        same_transverse = false;
+        break;
+      }
+    }
+
+    if (same_transverse) {
+      return cand;
+    }
+  }
+
+  assert(false);
+  return anchor_corner;
+}
+
+static void
+escape_barriers_ref(const struct gkyl_array *phi, const struct gkyl_array *bmag,
+  const struct gkyl_range *conf_range, const struct gkyl_array *basis_at_corners_conf,
+  int cdim, int num_basis_conf, const int *base_idx, int target_z_cell,
+  int anchor_corner, int anchor_z_side, double mu, double charge, double *barrier_left,
+  double *barrier_right)
+{
+  int zdim = cdim - 1;
+  int anchor_corner_node = corner_with_z_side_ref(cdim, anchor_corner, zdim, anchor_z_side);
+  int z_upper_corner = corner_with_z_side_ref(cdim, anchor_corner, zdim, 1);
+  int z_lower_corner = corner_with_z_side_ref(cdim, anchor_corner, zdim, 0);
+
+  int scan_idx[GKYL_MAX_DIM];
+  for (int d = 0; d < cdim; ++d) {
+    scan_idx[d] = base_idx[d];
+  }
 
+  *barrier_left = -DBL_MAX;
+  *barrier_right = -DBL_MAX;
+
+  for (int iz = conf_range->lower[zdim]; iz <= conf_range->upper[zdim]; ++iz) {
+    scan_idx[zdim] = iz;
+    long linidx = gkyl_range_idx(conf_range, scan_idx);
+
+    int left_corner = z_upper_corner;
+    int right_corner = z_lower_corner;
+    if (iz == target_z_cell) {
+      left_corner = anchor_corner_node;
+      right_corner = anchor_corner_node;
+    }
+
+    if (iz <= target_z_cell) {
+      double phi_left = field_corner_val(phi, basis_at_corners_conf, num_basis_conf, linidx, left_corner);
+      double bmag_left = field_corner_val(bmag, basis_at_corners_conf, num_basis_conf, linidx, left_corner);
+      double u_left = mu * bmag_left + charge * phi_left;
+      if (u_left > *barrier_left) {
+        *barrier_left = u_left;
+      }
+    }
+    if (iz >= target_z_cell) {
+      double phi_right = field_corner_val(phi, basis_at_corners_conf, num_basis_conf, linidx, right_corner);
+      double bmag_right = field_corner_val(bmag, basis_at_corners_conf, num_basis_conf, linidx, right_corner);
+      double u_right = mu * bmag_right + charge * phi_right;
+      if (u_right > *barrier_right) {
+        *barrier_right = u_right;
+      }
+    }
+  }
+}
+
+static void
+build_reference_mask(const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
+  const struct gkyl_basis *conf_basis, const struct gkyl_velocity_map *gvm,
+  const struct gkyl_array *bmag, const struct gkyl_array *phi,
+  double mass, double charge, struct gkyl_array *mask_ref)
+{
+  int cdim = conf_basis->ndim;
+  int pdim = phase_range->ndim;
+  int vdim = pdim - cdim;
+  int num_basis_conf = conf_basis->num_basis;
+  int num_phase_corners = 1 << pdim;
+
+  struct gkyl_array *basis_at_corners_conf = mk_basis_at_corners(conf_basis);
+
+  struct gkyl_range_iter phase_iter;
+  gkyl_range_iter_init(&phase_iter, phase_range);
+  while (gkyl_range_iter_next(&phase_iter)) {
+    int conf_idx[GKYL_MAX_DIM] = { 0 };
+    int vel_idx[GKYL_MAX_DIM] = { 0 };
+
+    for (int d = 0; d < cdim; ++d) {
+      conf_idx[d] = phase_iter.idx[d];
+    }
+    for (int d = 0; d < vdim; ++d) {
+      vel_idx[d] = phase_iter.idx[cdim + d];
+    }
+
+    bool cell_trapped = true;
+    for (int corner = 0; corner < num_phase_corners && cell_trapped; ++corner) {
+      int conf_corner = corner / (1 << vdim);
+      int vel_corner = corner % (1 << vdim);
+
+      double vpar = vel_corner_val(gvm, vel_idx, 0, vel_corner, vdim);
+      double mu = vel_corner_val(gvm, vel_idx, 1, vel_corner, vdim);
+
+      long linidx_conf = gkyl_range_idx(conf_range, conf_idx);
+      double bmag_curr = field_corner_val(bmag, basis_at_corners_conf, num_basis_conf,
+        linidx_conf, conf_corner);
+      double phi_curr = field_corner_val(phi, basis_at_corners_conf, num_basis_conf,
+        linidx_conf, conf_corner);
+      double h_curr = 0.5 * mass * vpar * vpar + mu * bmag_curr + charge * phi_curr;
+
+      int zdim = cdim - 1;
+      int target_z_cell = conf_idx[zdim];
+      int anchor_z_side = corner_z_endpoint_side_ref(cdim, conf_corner, zdim);
+
+      double barrier_left, barrier_right;
+      escape_barriers_ref(phi, bmag, conf_range, basis_at_corners_conf, cdim,
+        num_basis_conf, conf_idx, target_z_cell, conf_corner, anchor_z_side, mu, charge,
+        &barrier_left, &barrier_right);
+
+      cell_trapped = h_curr < GKYL_MIN2(barrier_left, barrier_right);
+    }
+
+    long linidx_phase = gkyl_range_idx(phase_range, phase_iter.idx);
+    double *m = gkyl_array_fetch(mask_ref, linidx_phase);
+    m[0] = cell_trapped ? 1.0 : 0.0;
+  }
+
+  gkyl_array_release(basis_at_corners_conf);
+}
+
+static void
+run_case_1x2v(int poly_order, bool use_gpu, bool use_nonzero_phi)
+{
   double eV = GKYL_ELEMENTARY_CHARGE;
   double mass_proton = GKYL_PROTON_MASS;
 
-  // Set reference parameters.
   struct loss_cone_mask_test_ctx ctx = {
     .cdim = 1,
     .eV = eV,
     .R_m = 8.0,
     .B_m = 4.0,
-    .z_m = M_PI/2.0,
-    .mass = 2.014*mass_proton,
+    .z_m = M_PI / 2.0,
+    .mass = 2.014 * mass_proton,
     .charge = eV,
     .n0 = 1e18,
-    .T0 = 100*eV,
-    .phi_fac = 3.0,
-    .z_max = M_PI,
-    .Nz = 8,
-    .Nvpar = 8,
-    .Nmu = 4,
-    .quad_type = GKYL_GAUSS_LOBATTO_QUAD,
-    .num_quad = 2,
-    .cellwise_trap_loss = true,
+    .T0 = 100 * eV,
+    .phi_fac = 5.0,
+    .z_max = M_PI-0.5,
+    .Nz = 64,
+    .Nvpar = 16,
+    .Nmu = 16,
   };
-  ctx.B0 = ctx.B_m/2.0;
-  ctx.vpar_max = 6.0*sqrt(ctx.T0/ctx.mass);
-  ctx.mu_max = 0.5*ctx.mass*pow(ctx.vpar_max,2)/ctx.B0;
-
-  double mass = ctx.mass;
-  double lower[] = {-ctx.z_max, -ctx.vpar_max, 0.0}, upper[] = {ctx.z_max, ctx.vpar_max, ctx.mu_max};
-  int cells[] = {ctx.Nz, ctx.Nvpar, ctx.Nmu};
-  const int ndim = sizeof(cells)/sizeof(cells[0]);
+  ctx.B0 = ctx.B_m / 2.0;
+  ctx.vpar_max = 6.0 * sqrt(ctx.T0 / ctx.mass);
+  ctx.mu_max = 0.5 * ctx.mass * pow(ctx.vpar_max, 2) / ctx.B0;
+
+  double lower[] = { -ctx.z_max, -ctx.vpar_max, 0.0 };
+  double upper[] = { ctx.z_max, ctx.vpar_max, ctx.mu_max };
+  int cells[] = { ctx.Nz, ctx.Nvpar, ctx.Nmu };
+  const int ndim = sizeof(cells) / sizeof(cells[0]);
   const int cdim = ctx.cdim;
-  const int vdim = ndim-ctx.cdim;
+  const int vdim = ndim - cdim;
 
-  // Grids.
   double lower_conf[cdim], upper_conf[cdim];
   int cells_conf[cdim];
-  for (int d=0; d<cdim; d++) {
+  for (int d = 0; d < cdim; ++d) {
     lower_conf[d] = lower[d];
     upper_conf[d] = upper[d];
     cells_conf[d] = cells[d];
   }
+
   double lower_vel[vdim], upper_vel[vdim];
   int cells_vel[vdim];
-  for (int d=0; d<vdim; d++) {
-    lower_vel[d] = lower[cdim+d];
-    upper_vel[d] = upper[cdim+d];
-    cells_vel[d] = cells[cdim+d];
+  for (int d = 0; d < vdim; ++d) {
+    lower_vel[d] = lower[cdim + d];
+    upper_vel[d] = upper[cdim + d];
+    cells_vel[d] = cells[cdim + d];
   }
-  struct gkyl_rect_grid grid;
+
+  struct gkyl_rect_grid grid, grid_conf, grid_vel;
   gkyl_rect_grid_init(&grid, ndim, lower, upper, cells);
-  struct gkyl_rect_grid grid_conf;
   gkyl_rect_grid_init(&grid_conf, cdim, lower_conf, upper_conf, cells_conf);
-  struct gkyl_rect_grid grid_vel;
   gkyl_rect_grid_init(&grid_vel, vdim, lower_vel, upper_vel, cells_vel);
 
-  // Basis functions.
   struct gkyl_basis basis, basis_conf;
-  if (poly_order == 1) 
+  if (poly_order == 1) {
     gkyl_cart_modal_gkhybrid(&basis, cdim, vdim);
-  else
+  }
+  else {
     gkyl_cart_modal_serendip(&basis, ndim, poly_order);
+  }
   gkyl_cart_modal_serendip(&basis_conf, cdim, poly_order);
 
   struct gkyl_basis *basis_on_dev, *basis_on_dev_conf;
@@ -180,41 +356,43 @@ test_1x2v_gk(int poly_order, bool use_gpu)
 #ifdef GKYL_HAVE_CUDA
     basis_on_dev = gkyl_cu_malloc(sizeof(struct gkyl_basis));
     basis_on_dev_conf = gkyl_cu_malloc(sizeof(struct gkyl_basis));
-    if (poly_order == 1) 
+    if (poly_order == 1) {
       gkyl_cart_modal_gkhybrid_cu_dev(basis_on_dev, cdim, vdim);
-    else
+    }
+    else {
       gkyl_cart_modal_serendip_cu_dev(basis_on_dev, ndim, poly_order);
+    }
     gkyl_cart_modal_serendip_cu_dev(basis_on_dev_conf, cdim, poly_order);
 #endif
   }
-  else { 
+  else {
     basis_on_dev = &basis;
     basis_on_dev_conf = &basis_conf;
   }
 
-  // Ranges.
-  int ghost_conf[] = { 1, 1, 1 }; // 3 elements because it's used by geo.
-  struct gkyl_range local_conf, local_ext_conf; // local, local-ext conf-space ranges
+  int ghost_conf[] = { 1, 1, 1 };
+  struct gkyl_range local_conf, local_ext_conf;
   gkyl_create_grid_ranges(&grid_conf, ghost_conf, &local_ext_conf, &local_conf);
 
   int ghost_vel[] = { 0, 0 };
-  struct gkyl_range local_vel, local_ext_vel; // local, local-ext vel-space ranges
+  struct gkyl_range local_vel, local_ext_vel;
   gkyl_create_grid_ranges(&grid_vel, ghost_vel, &local_ext_vel, &local_vel);
 
   int ghost[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<cdim; d++) ghost[d] = ghost_conf[d];
-  struct gkyl_range local, local_ext; // local, local-ext phase-space ranges
+  for (int d = 0; d < cdim; ++d) {
+    ghost[d] = ghost_conf[d];
+  }
+  struct gkyl_range local, local_ext;
   gkyl_create_grid_ranges(&grid, ghost, &local_ext, &local);
 
   struct gkyl_position_map *pmap = gkyl_position_map_null_new();
 
-  // Initialize geometry
   struct gkyl_gk_geometry_inp geometry_input = {
     .geometry_id = GKYL_GEOMETRY_MAPC2P,
-    .world = {0.0, 0.0},
-    .mapc2p = mapc2p_3x, // mapping of computational to physical space
+    .world = { 0.0, 0.0 },
+    .mapc2p = mapc2p_3x,
     .c2p_ctx = 0,
-    .bfield_func = bfield_func_3x, // magnetic field magnitude
+    .bfield_func = bfield_func_3x,
     .bfield_ctx = &ctx,
     .position_map = pmap,
     .grid = grid_conf,
@@ -225,179 +403,147 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     .basis = basis_conf,
   };
   geometry_input.geo_grid = gkyl_gk_geometry_augment_grid(grid_conf, geometry_input);
-  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext, &geometry_input.geo_local);
+  gkyl_create_grid_ranges(&geometry_input.geo_grid, ghost_conf, &geometry_input.geo_local_ext,
+    &geometry_input.geo_local);
   gkyl_cart_modal_serendip(&geometry_input.geo_basis, 3, poly_order);
-  struct gk_geometry* gk_geom_3d;
-  gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
-  // Deflate geometry if necessary.
+
+  struct gk_geometry *gk_geom_3d = gkyl_gk_geometry_mapc2p_new(&geometry_input);
   struct gk_geometry *gk_geom = gkyl_gk_geometry_deflate(gk_geom_3d, &geometry_input);
   gkyl_gk_geometry_release(gk_geom_3d);
-  // If we are on the gpu, copy from host
+
   if (use_gpu) {
-    struct gk_geometry* gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
+    struct gk_geometry *gk_geom_dev = gkyl_gk_geometry_new(gk_geom, &geometry_input, use_gpu);
     gkyl_gk_geometry_release(gk_geom);
     gk_geom = gkyl_gk_geometry_acquire(gk_geom_dev);
     gkyl_gk_geometry_release(gk_geom_dev);
   }
 
-  // Velocity space mapping.
   struct gkyl_mapc2p_inp c2p_in = { };
   struct gkyl_velocity_map *gvm = gkyl_velocity_map_new(c2p_in, grid, grid_vel,
     local, local_ext, local_vel, local_ext_vel, use_gpu);
+  struct gkyl_velocity_map *gvm_ho = gkyl_velocity_map_new(c2p_in, grid, grid_vel,
+    local, local_ext, local_vel, local_ext_vel, false);
 
-  // Project the electostatic potential.
   struct gkyl_array *phi = mkarr(use_gpu, basis_conf.num_basis, local_ext_conf.volume);
-  struct gkyl_array *phi_ho = use_gpu? mkarr(false, phi->ncomp, phi->size)
-	                             : gkyl_array_acquire(phi);
+  struct gkyl_array *phi_ho = mkarr(false, basis_conf.num_basis, local_ext_conf.volume);
 
-  gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func_1x, &ctx);
+  evalf_t phi_func = use_nonzero_phi ? phi_func_1x_nonzero : phi_func_1x_zero;
+  gkyl_eval_on_nodes *evphi = gkyl_eval_on_nodes_new(&grid_conf, &basis_conf, 1, phi_func, &ctx);
   gkyl_eval_on_nodes_advance(evphi, 0.0, &local_conf, phi_ho);
   gkyl_eval_on_nodes_release(evphi);
   gkyl_array_copy(phi, phi_ho);
 
-  // Location of the mirror throat.
-  double bmag_max_loc_ho[] = {ctx.z_m};
-  double *bmag_max_loc;
-  if (use_gpu) {
-    bmag_max_loc = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    bmag_max_loc = gkyl_malloc(sizeof(double));
-    memcpy(bmag_max_loc, bmag_max_loc_ho, sizeof(double));
-  }
-
-  // Get the magnetic field at the mirror throat.
-  double bfield_max_ho[3], bmag_max_ho[1];
-  double xc_infl[] = {0.0,0.0,ctx.z_m};
-  bfield_func_3x(0.0, xc_infl, bfield_max_ho, &ctx);
-  bmag_max_ho[0] = bfield_max_ho[2];
-  double *bmag_max;
-  if (use_gpu) {
-    bmag_max = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(bmag_max, bmag_max_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    bmag_max = gkyl_malloc(sizeof(double));
-    memcpy(bmag_max, bmag_max_ho, sizeof(double));
-  }
-
-  // Get the potential at the mirror throat (z=pi/2).
-  double phi_m_ho[1];
-  double xc[] = {ctx.z_m};
-  phi_func_1x(0.0, xc, phi_m_ho, &ctx);
-  double *phi_m;
-  if (use_gpu) {
-    phi_m = gkyl_cu_malloc(sizeof(double));
-    gkyl_cu_memcpy(phi_m, phi_m_ho, sizeof(double), GKYL_CU_MEMCPY_H2D);
-  }
-  else {
-    phi_m = gkyl_malloc(sizeof(double));
-    memcpy(phi_m, phi_m_ho, sizeof(double));
-  }
-
-  // Basis used to project the mask.
-  struct gkyl_basis basis_mask;
-  if (ctx.num_quad == 1 || ctx.cellwise_trap_loss)
-    gkyl_cart_modal_serendip(&basis_mask, ndim, 0);
-  else {
-    if (poly_order == 1) 
-      gkyl_cart_modal_gkhybrid(&basis_mask, cdim, vdim);
-    else
-      gkyl_cart_modal_serendip(&basis_mask, ndim, poly_order);
-  }
-
-  // Create mask array.
-  struct gkyl_array *mask = mkarr(use_gpu, basis_mask.num_basis, local_ext.volume);
-  struct gkyl_array *mask_ho = use_gpu? mkarr(false, mask->ncomp, mask->size)
-	                              : gkyl_array_acquire(mask);
-
-  // Project the loss cone mask.
   struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj = {
-    .phase_grid = &grid,
     .conf_basis = &basis_conf,
-    .phase_basis = &basis,
-    .conf_range =  &local_conf,
-    .conf_range_ext = &local_ext_conf,
-    .vel_range = &local_vel, 
+    .conf_range = &local_conf,
     .vel_map = gvm,
-    .bmag = gk_geom->geo_int.bmag,
-    .bmag_max = bmag_max,
-    .bmag_max_loc = bmag_max_loc,
+    .use_gpu = use_gpu,
     .mass = ctx.mass,
     .charge = ctx.charge,
-    .qtype = ctx.quad_type,
-    .num_quad = ctx.num_quad,
-    .cellwise_trap_loss = ctx.cellwise_trap_loss,
-    .use_gpu = use_gpu,
   };
-  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask = gkyl_loss_cone_mask_gyrokinetic_inew( &inp_proj );
+  struct gkyl_loss_cone_mask_gyrokinetic *proj_mask =
+    gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj);
 
-  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf, phi, phi_m, mask);
+  struct gkyl_array *mask = mkarr(use_gpu, 1, local_ext.volume);
+  struct gkyl_array *mask_ho = mkarr(false, 1, local_ext.volume);
+  struct gkyl_array *mask_cpu = mkarr(false, 1, local_ext.volume);
+  struct gkyl_array *mask_ref = mkarr(false, 1, local_ext.volume);
 
+  gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask, &local, &local_conf,
+    gk_geom->geo_corn.bmag, phi, mask);
   gkyl_array_copy(mask_ho, mask);
 
-  // Project expected mask.
-  struct gkyl_array *mask_ref_ho = mkarr(false, basis_mask.num_basis, local_ext.volume);
-  gkyl_proj_on_basis *evmask_ref = gkyl_proj_on_basis_new(&grid, &basis_mask, basis_mask.poly_order+1, 1, mask_ref_1x2v, &ctx);
-  gkyl_proj_on_basis_advance(evmask_ref, 0.0, &local, mask_ref_ho);
-  gkyl_proj_on_basis_release(evmask_ref);
-  if (ctx.num_quad == 1) {
-    // Rescale to deal with normalization.
-    gkyl_array_scale(mask_ref_ho, 1.0/pow(sqrt(2.0),cdim+vdim));
+  struct gkyl_array *bmag_ho = mkarr(false, basis_conf.num_basis, local_ext_conf.volume);
+  gkyl_array_copy(bmag_ho, gk_geom->geo_corn.bmag);
+
+  {
+    char f_bmag[256], f_phi[256], f_mask[256];
+    const char *suffix_dev = use_gpu ? "dev" : "ho";
+    const char *suffix_phi = use_nonzero_phi ? "nonzero_phi" : "zero_phi";
+    const struct gkyl_range arr_range_conf = local_conf;
+    const struct gkyl_range arr_range = local;
+
+    snprintf(f_bmag, sizeof(f_bmag), "ctest_loss_cone_mask_%s_%s_bmag.gkyl", suffix_dev,
+      suffix_phi);
+    snprintf(f_phi, sizeof(f_phi), "ctest_loss_cone_mask_%s_%s_phi.gkyl", suffix_dev,
+      suffix_phi);
+    snprintf(f_mask, sizeof(f_mask), "ctest_loss_cone_mask_%s_%s_mask.gkyl", suffix_dev,
+      suffix_phi);
+
+    gkyl_grid_sub_array_write(&grid_conf, &arr_range_conf, 0, bmag_ho, f_bmag);
+    gkyl_grid_sub_array_write(&grid_conf, &arr_range_conf, 0, phi_ho, f_phi);
+    gkyl_grid_sub_array_write(&grid, &arr_range, 0, mask_ho, f_mask);
   }
 
-//  // values to compare  at index (1, 9, 9) [remember, lower-left index is (1,1,1)]
-//  double p1_vals[] = {  
-//     7.2307139183122714e-03, 0.0000000000000000e+00, 1.9198293226362615e-04, -7.7970439910196674e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
-//    -2.0701958137127286e-05, 0.0000000000000000e+00, -1.4953406100022537e-04, 0.0000000000000000e+00, 1.6124599381836546e-05, 0.0000000000000000e+00,
-//    -8.2719200283232917e-19, 0.0000000000000000e+00, -3.4806248503322844e-20, 0.0000000000000000e+00, };
-//  double p2_vals[] = { 
-//    7.2307468609012666e-03, 0.0000000000000000e+00, 1.9198380692343289e-04, -7.8092230706225602e-04, 0.0000000000000000e+00, 0.0000000000000000e+00,
-//    -2.0734294852987710e-05, 3.6591823321385775e-18, -1.4953474226616330e-04, 3.7739922227981074e-05, 0.0000000000000000e+00, 7.0473141211557788e-19,
-//    0.0000000000000000e+00, -4.8789097761847700e-19, 1.6149786206441256e-05, 0.0000000000000000e+00, 1.0020339643610290e-06, 5.4210108624275222e-20,
-//    0.0000000000000000e+00, 0.0000000000000000e+00 };
-//
-//  const double *fv = gkyl_array_cfetch(distf, gkyl_range_idx(&local_ext, (int[3]) { 1, 9, 9 }));
-//  if (poly_order == 1) {
-//    for (int i=0; i<basis.num_basis; ++i) {
-//      TEST_CHECK( gkyl_compare_double(p1_vals[i], fv[i], 1e-2) );
-//    }
-//  }
-//
-//  if (poly_order == 2) {
-//    for (int i=0; i<basis.num_basis; ++i)
-//      TEST_CHECK( gkyl_compare_double(p2_vals[i], fv[i], 1e-2) );
-//  }
-
-  // Write mask to file.
-  char fname[1024];
-  if (use_gpu)
-    sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_dev.gkyl", poly_order);
-  else
-    sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ho.gkyl", poly_order);
-  gkyl_grid_sub_array_write(&grid, &local, 0, mask_ho, fname);
-
-  sprintf(fname, "ctest_loss_cone_mask_gyrokinetic_1x2v_p%d_ref.gkyl", poly_order);
-  gkyl_grid_sub_array_write(&grid, &local, 0, mask_ref_ho, fname);
+  gkyl_array_clear(mask_ref, 0.0);
+  build_reference_mask(&local, &local_conf, &basis_conf, gvm_ho,
+    bmag_ho, phi_ho, ctx.mass, ctx.charge, mask_ref);
 
+#ifdef GKYL_HAVE_CUDA
   if (use_gpu) {
-    gkyl_cu_free(bmag_max);
-    gkyl_cu_free(bmag_max_loc);
-    gkyl_cu_free(phi_m);
+    struct gkyl_loss_cone_mask_gyrokinetic_inp inp_proj_cpu = {
+      .conf_basis = &basis_conf,
+      .conf_range = &local_conf,
+      .vel_map = gvm_ho,
+      .use_gpu = false,
+      .mass = ctx.mass,
+      .charge = ctx.charge,
+    };
+    struct gkyl_loss_cone_mask_gyrokinetic *proj_mask_cpu =
+      gkyl_loss_cone_mask_gyrokinetic_inew(&inp_proj_cpu);
+
+    gkyl_loss_cone_mask_gyrokinetic_advance(proj_mask_cpu, &local, &local_conf,
+      bmag_ho, phi_ho, mask_cpu);
+
+    int gpu_cpu_mismatches = 0;
+    struct gkyl_range_iter iter_cmp;
+    gkyl_range_iter_init(&iter_cmp, &local);
+    while (gkyl_range_iter_next(&iter_cmp)) {
+      long linidx = gkyl_range_idx(&local, iter_cmp.idx);
+      const double *mg = gkyl_array_cfetch(mask_ho, linidx);
+      const double *mc = gkyl_array_cfetch(mask_cpu, linidx);
+
+      bool same = fabs(mg[0] - mc[0]) < 1e-12;
+      TEST_CHECK(same);
+      if (!same && gpu_cpu_mismatches < 8) {
+        printf("gpu/cpu mismatch idx=(%d,%d,%d): gpu=%g cpu=%g\n",
+          iter_cmp.idx[0], iter_cmp.idx[1], iter_cmp.idx[2], mg[0], mc[0]);
+        gpu_cpu_mismatches++;
+      }
+    }
+
+    gkyl_loss_cone_mask_gyrokinetic_release(proj_mask_cpu);
   }
-  else {
-    gkyl_free(bmag_max);
-    gkyl_free(bmag_max_loc);
-    gkyl_free(phi_m);
+#endif
+
+  int mismatches = 0;
+  struct gkyl_range_iter iter;
+  gkyl_range_iter_init(&iter, &local);
+  while (gkyl_range_iter_next(&iter)) {
+    long linidx = gkyl_range_idx(&local, iter.idx);
+    const double *m = gkyl_array_cfetch(mask_ho, linidx);
+    const double *r = gkyl_array_cfetch(mask_ref, linidx);
+
+    bool ok = fabs(m[0] - r[0]) < 1e-12;
+    TEST_CHECK(ok);
+    if (!ok && mismatches < 8) {
+      printf("mask mismatch idx=(%d,%d,%d): got=%g ref=%g\n",
+        iter.idx[0], iter.idx[1], iter.idx[2], m[0], r[0]);
+      mismatches++;
+    }
   }
-  gkyl_array_release(phi); 
-  gkyl_array_release(phi_ho); 
-  gkyl_array_release(mask); 
+
+  gkyl_array_release(bmag_ho);
+  gkyl_array_release(phi);
+  gkyl_array_release(phi_ho);
+  gkyl_array_release(mask);
   gkyl_array_release(mask_ho);
-  gkyl_array_release(mask_ref_ho);
+  gkyl_array_release(mask_cpu);
+  gkyl_array_release(mask_ref);
+
   gkyl_loss_cone_mask_gyrokinetic_release(proj_mask);
   gkyl_velocity_map_release(gvm);
+  gkyl_velocity_map_release(gvm_ho);
   gkyl_gk_geometry_release(gk_geom);
   gkyl_position_map_release(pmap);
 
@@ -406,20 +552,44 @@ test_1x2v_gk(int poly_order, bool use_gpu)
     gkyl_cu_free(basis_on_dev);
     gkyl_cu_free(basis_on_dev_conf);
   }
-#endif  
+#else
+  (void) basis_on_dev;
+  (void) basis_on_dev_conf;
+#endif
+}
+
+void
+test_1x2v_p1_gk_ho(void)
+{
+  run_case_1x2v(1, false, false);
 }
 
-void test_1x2v_p1_gk_ho() { test_1x2v_gk(1, false); }
+void
+test_1x2v_p1_nonzero_phi_gk_ho(void)
+{
+  run_case_1x2v(1, false, true);
+}
 
 #ifdef GKYL_HAVE_CUDA
-void test_1x2v_p1_gk_dev() { test_1x2v_gk(1, true); }
+void
+test_1x2v_p1_gk_dev(void)
+{
+  run_case_1x2v(1, true, false);
+}
+
+void
+test_1x2v_p1_nonzero_phi_gk_dev(void)
+{
+  run_case_1x2v(1, true, true);
+}
 #endif
 
 TEST_LIST = {
   { "test_1x2v_p1_gk_ho", test_1x2v_p1_gk_ho },
-
+  { "test_1x2v_p1_nonzero_phi_gk_ho", test_1x2v_p1_nonzero_phi_gk_ho },
 #ifdef GKYL_HAVE_CUDA
   { "test_1x2v_p1_gk_dev", test_1x2v_p1_gk_dev },
+  { "test_1x2v_p1_nonzero_phi_gk_dev", test_1x2v_p1_nonzero_phi_gk_dev },
 #endif
   { NULL, NULL },
 };
diff --git a/gyrokinetic/unit/mctest_loss_cone_mask_gyrokinetic.c b/gyrokinetic/unit/mctest_loss_cone_mask_gyrokinetic.c
new file mode 100644
index 0000000000..0beb503f85
--- /dev/null
+++ b/gyrokinetic/unit/mctest_loss_cone_mask_gyrokinetic.c
@@ -0,0 +1,230 @@
+#include <acutest.h>
+
+#include <math.h>
+
+#include <gkyl_alloc.h>
+#include <gkyl_array.h>
+#include <gkyl_comm.h>
+#include <gkyl_const.h>
+#include <gkyl_eval_on_nodes.h>
+#include <gkyl_loss_cone_mask_gyrokinetic.h>
+#include <gkyl_null_comm.h>
+#include <gkyl_position_map.h>
+#include <gkyl_range.h>
+#include <gkyl_rect_decomp.h>
+#include <gkyl_rect_grid.h>
+#include <gkyl_velocity_map.h>
+
+#ifdef GKYL_HAVE_MPI
+#include <gkyl_mpi_comm.h>
+#endif
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+struct lc_ctx {
+  double z_max;
+  double b_m;
+  double r_m;
+  double t0;
+  double eV;
+  double phi_fac;
+};
+
+typedef void (*evalf_t)(double t, const double *xn, double *fout, void *ctx);
+
+static struct gkyl_array*
+mkarr(long nc, long size)
+{
+  return gkyl_array_new(GKYL_DOUBLE, nc, size);
+}
+
+static struct gkyl_comm*
+comm_new(struct gkyl_rect_decomp *decomp)
+{
+#ifdef GKYL_HAVE_MPI
+  return gkyl_mpi_comm_new(&(struct gkyl_mpi_comm_inp) {
+    .mpi_comm = MPI_COMM_WORLD,
+    .decomp = decomp,
+  });
+#else
+  return gkyl_null_comm_inew(&(struct gkyl_null_comm_inp) { .use_gpu = false });
+#endif
+}
+
+static void
+bmag_func_1x(double t, const double *xn, double *fout, void *ctx)
+{
+  struct lc_ctx *p = ctx;
+  double z = xn[0];
+  fout[0] = p->b_m * (1.0 - ((p->r_m - 1.0) / p->r_m) * pow(cos(z), 2.0));
+}
+
+static void
+phi_func_1x(double t, const double *xn, double *fout, void *ctx)
+{
+  struct lc_ctx *p = ctx;
+  double z = xn[0];
+  fout[0] = p->phi_fac * p->t0 / p->eV * (1.0 - pow(cos(4.0 * z), 2.0));
+}
+
+static void
+eval_dg_on_range(const struct gkyl_rect_grid *grid, const struct gkyl_basis *basis,
+  const struct gkyl_range *range, evalf_t eval, void *ctx, struct gkyl_array *out)
+{
+  struct gkyl_eval_on_nodes *ev = gkyl_eval_on_nodes_new(grid, basis, 1, eval, ctx);
+  gkyl_eval_on_nodes_advance(ev, 0.0, range, out);
+  gkyl_eval_on_nodes_release(ev);
+}
+
+static void
+test_loss_cone_mask_parallel_4dom(void)
+{
+  int rank = 0, size = 1;
+#ifdef GKYL_HAVE_MPI
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+#endif
+
+  if (size != 4) {
+    if (rank == 0) {
+      TEST_MSG("mctest_loss_cone_mask_gyrokinetic requires 4 MPI ranks, got %d", size);
+    }
+    return;
+  }
+
+  const int cdim = 1, vdim = 2, pdim = 3;
+  const int nz = 64, nvpar = 16, nmu = 16;
+  const int cells_conf[] = { nz };
+  const int cells_phase[] = { nz, nvpar, nmu };
+  const int cells_vel[] = { nvpar, nmu };
+  const int cuts[] = { 4 };
+  const double z_max = M_PI - 0.5;
+  const double lower_phase[] = { -z_max, -6.0, 0.0 };
+  const double upper_phase[] = { z_max, 6.0, 18.0 };
+  const double lower_conf[] = { -z_max };
+  const double upper_conf[] = { z_max };
+  const double lower_vel[] = { -6.0, 0.0 };
+  const double upper_vel[] = { 6.0, 18.0 };
+
+  struct gkyl_rect_grid grid_conf, grid_phase, grid_vel;
+  gkyl_rect_grid_init(&grid_conf, cdim, lower_conf, upper_conf, cells_conf);
+  gkyl_rect_grid_init(&grid_phase, pdim, lower_phase, upper_phase, cells_phase);
+  gkyl_rect_grid_init(&grid_vel, vdim, lower_vel, upper_vel, cells_vel);
+
+  struct gkyl_range conf_global, phase_global, vel_global;
+  gkyl_create_global_range(cdim, cells_conf, &conf_global);
+  gkyl_create_global_range(pdim, cells_phase, &phase_global);
+  gkyl_create_global_range(vdim, cells_vel, &vel_global);
+
+  struct gkyl_rect_decomp *conf_decomp = gkyl_rect_decomp_new_from_cuts(cdim, cuts, &conf_global);
+  int phase_cuts[] = { 4, 1, 1 };
+  struct gkyl_rect_decomp *phase_decomp = gkyl_rect_decomp_new_from_cuts(pdim, phase_cuts, &phase_global);
+  struct gkyl_comm *comm_conf = comm_new(conf_decomp);
+  struct gkyl_comm *comm_phase = comm_new(phase_decomp);
+
+  const struct gkyl_range *conf_local = &conf_decomp->ranges[rank];
+  const struct gkyl_range *phase_local = &phase_decomp->ranges[rank];
+
+  struct gkyl_basis basis_conf, basis_phase;
+  gkyl_cart_modal_serendip(&basis_conf, cdim, 1);
+  gkyl_cart_modal_gkhybrid(&basis_phase, cdim, vdim);
+
+  struct lc_ctx ctx = {
+    .z_max = z_max,
+    .b_m = 4.0,
+    .r_m = 8.0,
+    .t0 = 100.0 * GKYL_ELEMENTARY_CHARGE,
+    .eV = GKYL_ELEMENTARY_CHARGE,
+    .phi_fac = 5.0,
+  };
+
+  struct gkyl_array *bmag_local = mkarr(basis_conf.num_basis, conf_local->volume);
+  struct gkyl_array *phi_local = mkarr(basis_conf.num_basis, conf_local->volume);
+  struct gkyl_array *bmag_global_gather = mkarr(basis_conf.num_basis, conf_global.volume);
+  struct gkyl_array *phi_global_gather = mkarr(basis_conf.num_basis, conf_global.volume);
+
+  eval_dg_on_range(&grid_conf, &basis_conf, conf_local, bmag_func_1x, &ctx, bmag_local);
+  eval_dg_on_range(&grid_conf, &basis_conf, conf_local, phi_func_1x, &ctx, phi_local);
+
+  gkyl_comm_array_allgather(comm_conf, conf_local, &conf_global, bmag_local, bmag_global_gather);
+  gkyl_comm_array_allgather(comm_conf, conf_local, &conf_global, phi_local, phi_global_gather);
+
+  struct gkyl_position_map *pmap = gkyl_position_map_null_new();
+  struct gkyl_mapc2p_inp c2p_in = { };
+  struct gkyl_velocity_map *gvm_local = gkyl_velocity_map_new(c2p_in, grid_phase, grid_vel,
+    *phase_local, *phase_local, vel_global, vel_global, false);
+
+  struct gkyl_loss_cone_mask_gyrokinetic *up_local =
+    gkyl_loss_cone_mask_gyrokinetic_inew(&(struct gkyl_loss_cone_mask_gyrokinetic_inp) {
+      .conf_basis = &basis_conf,
+      .conf_range = &conf_global,
+      .vel_map = gvm_local,
+      .mass = 2.014 * GKYL_PROTON_MASS,
+      .charge = GKYL_ELEMENTARY_CHARGE,
+    });
+
+  struct gkyl_array *mask_local = mkarr(1, phase_local->volume);
+  gkyl_loss_cone_mask_gyrokinetic_advance(up_local, phase_local, &conf_global,
+    bmag_global_gather, phi_global_gather, mask_local);
+
+  struct gkyl_array *mask_dist_global = mkarr(1, phase_global.volume);
+  gkyl_comm_array_allgather(comm_phase, phase_local, &phase_global, mask_local, mask_dist_global);
+
+  if (rank == 0) {
+    struct gkyl_array *bmag_ref = mkarr(basis_conf.num_basis, conf_global.volume);
+    struct gkyl_array *phi_ref = mkarr(basis_conf.num_basis, conf_global.volume);
+    eval_dg_on_range(&grid_conf, &basis_conf, &conf_global, bmag_func_1x, &ctx, bmag_ref);
+    eval_dg_on_range(&grid_conf, &basis_conf, &conf_global, phi_func_1x, &ctx, phi_ref);
+
+    struct gkyl_velocity_map *gvm_global = gkyl_velocity_map_new(c2p_in, grid_phase, grid_vel,
+      phase_global, phase_global, vel_global, vel_global, false);
+    struct gkyl_loss_cone_mask_gyrokinetic *up_ref =
+      gkyl_loss_cone_mask_gyrokinetic_inew(&(struct gkyl_loss_cone_mask_gyrokinetic_inp) {
+        .conf_basis = &basis_conf,
+        .conf_range = &conf_global,
+        .vel_map = gvm_global,
+        .mass = 2.014 * GKYL_PROTON_MASS,
+        .charge = GKYL_ELEMENTARY_CHARGE,
+      });
+
+    struct gkyl_array *mask_ref = mkarr(1, phase_global.volume);
+    gkyl_loss_cone_mask_gyrokinetic_advance(up_ref, &phase_global, &conf_global,
+      bmag_ref, phi_ref, mask_ref);
+
+    struct gkyl_range_iter it;
+    gkyl_range_iter_init(&it, &phase_global);
+    while (gkyl_range_iter_next(&it)) {
+      long linidx = gkyl_range_idx(&phase_global, it.idx);
+      const double *md = gkyl_array_cfetch(mask_dist_global, linidx);
+      const double *mr = gkyl_array_cfetch(mask_ref, linidx);
+      TEST_CHECK(fabs(md[0] - mr[0]) < 1e-12);
+    }
+
+    gkyl_array_release(mask_ref);
+    gkyl_loss_cone_mask_gyrokinetic_release(up_ref);
+    gkyl_velocity_map_release(gvm_global);
+    gkyl_array_release(phi_ref);
+    gkyl_array_release(bmag_ref);
+  }
+
+  gkyl_array_release(mask_dist_global);
+  gkyl_array_release(mask_local);
+  gkyl_loss_cone_mask_gyrokinetic_release(up_local);
+  gkyl_velocity_map_release(gvm_local);
+  gkyl_position_map_release(pmap);
+  gkyl_array_release(phi_global_gather);
+  gkyl_array_release(bmag_global_gather);
+  gkyl_array_release(phi_local);
+  gkyl_array_release(bmag_local);
+  gkyl_comm_release(comm_phase);
+  gkyl_comm_release(comm_conf);
+  gkyl_rect_decomp_release(phase_decomp);
+  gkyl_rect_decomp_release(conf_decomp);
+}
+
+TEST_LIST = {
+  { "loss_cone_mask_parallel_4dom", test_loss_cone_mask_parallel_4dom },
+  { NULL, NULL },
+};
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
index 36e12300c3..f6778d9f65 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic.h
@@ -10,70 +10,50 @@
 // Object type.
 typedef struct gkyl_loss_cone_mask_gyrokinetic gkyl_loss_cone_mask_gyrokinetic;
 
-// Type of function expected for the ctp_pos_func input.
-typedef void (*loss_cone_mask_gyrokinetic_c2p_t)(const double *xcomp, double *xphys, void *ctx);
-
-// Available options:
-//   A) num_quad=1, qtype=GKYL_GAUSS_QUAD. Output: ncomp=1 array.
-//   B) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=true. Output: ncomp=1 array.
-//   C) num_quad>1, qtype=GKYL_GAUSS_QUAD or GKYL_GAUSS_LOBATTO_QUAD, cellwise_trap_loss=false. Output: ncomp=phase_basis.ncomp array.
-
 // Inputs packaged as a struct.
 struct gkyl_loss_cone_mask_gyrokinetic_inp {
-  const struct gkyl_rect_grid *phase_grid; // Phase-space grid on which to compute moments.
   const struct gkyl_basis *conf_basis; // Configuration-space basis functions.
-  const struct gkyl_basis *phase_basis; // Phase-space basis functions.
   const struct gkyl_range *conf_range; // Configuration-space range.
-  const struct gkyl_range *conf_range_ext; // Extended configuration-space range (for internal memory allocations).
-  const struct gkyl_range *vel_range; // Velocity space range.
-  const struct gkyl_velocity_map *vel_map; // Velocity space mapping object.
-  const struct gkyl_array *bmag; // Magnetic field magnitude.
-  const double *bmag_max; // Maximum bmag (on GPU if use_gpu=true).
-  const double *bmag_max_loc; // Location of maximum bmag (on GPU if use_gpu=true)..
+  const struct gkyl_velocity_map *vel_map;    // Velocity space mapping object.
+  bool use_gpu; // Flag to indicate if GPU should be used.
   double mass; // Species mass.
   double charge; // Species charge.
-  enum gkyl_quad_type qtype; // Quadrature rule/nodes.
-  int num_quad; // Number of quad points in each direction to use (default: poly_order+1).
-  bool cellwise_trap_loss; // =True takes a whole cell to be either trapped or passing,
-                           // so not high-order distinction within the cell is made.
-  loss_cone_mask_gyrokinetic_c2p_t c2p_pos_func; // Function that transforms a set of cdim
-                                    // position-space computational coordinates to physical ones.
-  void *c2p_pos_func_ctx; // Context for c2p_pos_func.
-  bool use_gpu; // Whether to run on GPU.
 };
 
 /**
  * Create new updater that populates an array with the masking function
- *   if (mu > mu_bound)
- *     f = 1
- *   else
- *     f = 0
- * where mu_bound = (0.5*m*vpar^2+q*(phi-phi_m))/(B*(B_max/B-1))
- * is the trapped-passing boundary in vpar-mu space.
+ * based on the escape-barrier criterion
+ *   EB(z,mu) = min(max_{s in [z_L,z]} U(s,mu), max_{s in [z,z_R]} U(s,mu))
+ * with U = mu*B + q*phi.
+ * A node is trapped if H < EB.
  *
  * @param inp Input parameters defined in gkyl_loss_cone_mask_gyrokinetic_inp struct.
  * @return New updater pointer.
  */
-struct gkyl_loss_cone_mask_gyrokinetic* 
+struct gkyl_loss_cone_mask_gyrokinetic*
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp);
 
 /**
- * Compute projection of the loss cone masking function on the phase-space basis.
+ * Compute the loss-cone mask on phase-space cell nodes.
+ *
+ * The caller supplies the magnetic field magnitude and electrostatic
+ * potential arrays. This keeps the updater free of any communication logic;
+ * the app is responsible for assembling global data when needed.
  *
  * @param up Project on basis updater to run.
  * @param phase_rng Phase-space range.
  * @param conf_rng Configuration-space range.
+ * @param bmag Magnetic field magnitude (cdim DG expansion).
  * @param phi Electrostatic potential.
- * @param phi_m Electrostatic potential at the mirror throat (on GPU if use_gpu=true).
  * @param mask_out Output masking function.
  */
 void gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *bmag, const struct gkyl_array *phi, struct gkyl_array *mask_out);
 
 /**
  * Delete updater.
  *
  * @param up Updater to delete.
  */
-void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up);
+void gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up);
diff --git a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
index 03e57dec97..7c5b49381f 100644
--- a/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
+++ b/gyrokinetic/zero/gkyl_loss_cone_mask_gyrokinetic_priv.h
@@ -1,6 +1,7 @@
 // Private header: not for direct use
 #pragma once
 
+#include <float.h>
 #include <math.h>
 
 #include <gkyl_array.h>
@@ -8,105 +9,159 @@
 #include <gkyl_mat.h>
 #include <gkyl_mat_priv.h>
 #include <gkyl_range.h>
-#include <gkyl_rect_grid.h> 
 #include <gkyl_util.h>
 #include <assert.h>
 
 GKYL_CU_DH
 static inline void
-log_to_comp(int ndim, const double *eta,
-  const double * GKYL_RESTRICT dx, const double * GKYL_RESTRICT xc,
-  double* GKYL_RESTRICT xout)
+nodal_coords(int ndim, int node, double *x)
 {
-  for (int d=0; d<ndim; ++d) xout[d] = 0.5*dx[d]*eta[d]+xc[d];
-}
-
-static inline void
-copy_idx_arrays(int cdim, int pdim, const int *cidx, const int *vidx, int *out)
-{
-  for (int i=0; i<cdim; ++i)
-    out[i] = cidx[i];
-  for (int i=cdim; i<pdim; ++i)
-    out[i] = vidx[i-cdim];
+  for (int d = 0; d < ndim; ++d) {
+    int place_value = (int)pow(2.0, (double)(ndim - 1 - d));
+    int digit = (node / place_value) % 2;
+    x[d] = digit ? 1.0 : -1.0;
+  }
 }
 
 struct gkyl_loss_cone_mask_gyrokinetic {
   int cdim; // Configuration-space dimension.
-  int pdim; // Phase-space dimension.
-  int vdim; // Velocity-space dimension.
-
-  const struct gkyl_rect_grid *grid_phase;
   int num_basis_conf; // Number of configuration-space basis functions.
-  int num_basis_phase; // Number of phase-space basis functions.
-  double norm_fac; // Normalization factor.
+  int num_nodes_conf; // Number of configuration-space cell nodes.
+  bool use_gpu;
 
   const struct gkyl_velocity_map *vel_map; // Velocity space mapping object.
 
   double mass; // Species mass.
   double charge; // Species charge.
-  double *bmag_max; // Maximum magnetic field amplitude.
-  double *bmag_max_loc; // Location of bmag_max.
-  bool use_gpu; // Boolean if we are performing projection on device.
-
-  loss_cone_mask_gyrokinetic_c2p_t c2p_pos; // Function transforming position comp to phys coords.
-  void *c2p_pos_ctx; // Context for the c2p_pos mapping.
-
-  bool cellwise_trap_loss; // Whether a whole cell is trapped/lost, or whether
-                           // high-order distinction within a cell is allowed.
-  struct gkyl_range conf_qrange; // Range of Configuration-space ordinates.
-  struct gkyl_range phase_qrange; // Range of Phase-space ordinates.
-
-  // For quadrature in phase-space.
-  int tot_quad_phase; // Total number of quadrature points.
-  struct gkyl_array *ordinates_phase; // Ordinates.
-  struct gkyl_array *weights_phase; // Weights.
-  struct gkyl_array *basis_at_ords_phase; // Basis functions at ordinates.
-
-  // For quadrature in configuration-space.
-  int tot_quad_conf; // Total number of quadrature points.
-  struct gkyl_array *ordinates_conf; // Ordinates.
-  struct gkyl_array *weights_conf; // Weights.
-  struct gkyl_array *basis_at_ords_conf; // Basis functions at ordinates.
-
-  struct gkyl_array *fun_at_ords; // Mask we are projecting at ordinates in a cell.
-
-  int *p2c_qidx;  // Mapping between configuration-space and phase-space ordinates.
-  struct gkyl_array *mask_out_quad; // Array keeping f_lte at phase-space quadrature nodes.
-  struct gkyl_array *qDphiDbmag_quad; // Array keeping q*(phi-phi_m)/(B_max-B)
-                                      // at configuration-space quadrature nodes.
-  struct gkyl_array *Dbmag_quad; // B_max-B at configuration-space quadrature nodes.
-
-  struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem; // Structure of data which converts  
-                                                          // stores the info to convert phase
-                                                          // space nodal to modal gkyl arrays.
+
+  struct gkyl_array *basis_at_nodes_conf; // Basis functions at configuration-space nodes.
 };
 
+GKYL_CU_DH
+static inline int
+conf_node_z_endpoint_index(int cdim, int conf_node, int zdim)
+{
+  double eta[GKYL_MAX_DIM] = { 0.0 };
+  nodal_coords(cdim, conf_node, eta);
+  return eta[zdim] > 0.0 ? 1 : 0;
+}
+
+GKYL_CU_DH
+static inline int
+conf_node_with_matching_perpendicular_coords(int cdim, int anchor_node, int zdim,
+  int z_endpoint_index)
+{
+  int num_nodes_conf = (int)pow(2.0, (double)cdim);
+
+  double eta_anchor[GKYL_MAX_DIM] = { 0.0 };
+  nodal_coords(cdim, anchor_node, eta_anchor);
+
+  for (int cand = 0; cand < num_nodes_conf; ++cand) {
+    if (conf_node_z_endpoint_index(cdim, cand, zdim) != z_endpoint_index) {
+      continue;
+    }
+
+    double eta_cand[GKYL_MAX_DIM] = { 0.0 };
+    nodal_coords(cdim, cand, eta_cand);
+
+    bool same_transverse = true;
+    for (int d = 0; d < cdim; ++d) {
+      if (d == zdim) {
+        continue;
+      }
+      if (eta_cand[d] != eta_anchor[d]) {
+        same_transverse = false;
+        break;
+      }
+    }
+
+    if (same_transverse) {
+      return cand;
+    }
+  }
+
+  return anchor_node;
+}
+
+GKYL_CU_DH
+static inline double
+field_node_val(const struct gkyl_array *arr, const struct gkyl_array *basis_at_nodes,
+  int num_basis, long linidx, int node)
+{
+  const double *arr_d = (const double *)gkyl_array_cfetch(arr, linidx);
+  const double *basis_d = (const double *)gkyl_array_cfetch(basis_at_nodes, node);
+
+  double val = 0.0;
+  for (int k = 0; k < num_basis; ++k) {
+    val += arr_d[k] * basis_d[k];
+  }
+
+  return val;
+}
+
+GKYL_CU_DH
+static inline void
+escape_barriers(int cdim, int num_basis_conf, const struct gkyl_range *conf_range,
+  const struct gkyl_array *basis_at_nodes_conf, const struct gkyl_array *phi,
+  const struct gkyl_array *bmag, const int *base_idx, int z_cell,
+  int anchor_conf_node, double mu, double charge,
+  double *barrier_left, double *barrier_right)
+{
+  int zdim = cdim - 1;
+
+  int z_endpoint_index = conf_node_z_endpoint_index(cdim, anchor_conf_node, zdim);
+  int anchor_node = conf_node_with_matching_perpendicular_coords(cdim, anchor_conf_node, zdim,
+    z_endpoint_index);
+  int z_upper_node = conf_node_with_matching_perpendicular_coords(cdim, anchor_conf_node,
+    zdim, 1);
+  int z_lower_node = conf_node_with_matching_perpendicular_coords(cdim, anchor_conf_node,
+    zdim, 0);
+
+  int scan_idx[GKYL_MAX_DIM];
+  for (int d = 0; d < cdim; ++d) {
+    scan_idx[d] = base_idx[d];
+  }
+
+  *barrier_left = -DBL_MAX;
+  *barrier_right = -DBL_MAX;
+
+  for (int iz = conf_range->lower[zdim]; iz <= conf_range->upper[zdim]; ++iz) {
+    scan_idx[zdim] = iz;
+    long linidx = gkyl_range_idx(conf_range, scan_idx);
+
+    int z_scan_node = anchor_node;
+    if (iz < z_cell) {
+      z_scan_node = z_upper_node;
+    }
+    else if (iz > z_cell) {
+      z_scan_node = z_lower_node;
+    }
+
+    double phi_scan = field_node_val(phi, basis_at_nodes_conf, num_basis_conf,
+      linidx, z_scan_node);
+    double bmag_scan = field_node_val(bmag, basis_at_nodes_conf, num_basis_conf,
+      linidx, z_scan_node);
+    double u_scan = mu * bmag_scan + charge * phi_scan;
+
+    if (iz <= z_cell && u_scan > *barrier_left) {
+      *barrier_left = u_scan;
+    }
+    if (iz >= z_cell && u_scan > *barrier_right) {
+      *barrier_right = u_scan;
+    }
+  }
+}
+
 #ifdef GKYL_HAVE_CUDA
-/**
- * Obtain bmag_max-bmag at conf-space quadrature nodes and store it in up->Dbmag_quad.
- *
- * @param up Project on basis updater to run.
- * @param conf_rng Configuration-space range.
- * @param bmag Magnetic field magnitude.
- * @param bmag_max Maximum bmag.
- */
-void 
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max);
-
-/**
- * Compute projection of the loss cone masking function on the phase-space basis
- * on the GPU.
- *
- * @param up Project on basis updater to run.
- * @param phase_rng Phase-space range.
- * @param conf_rng Configuration-space range.
- * @param phi Electrostatic potential.
- * @param phi_m Electrostatic potential at the mirror throat (on GPU).
- * @param mask_out Output masking function.
- */
-void
-gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out);
+  const struct gkyl_array *bmag, const struct gkyl_array *phi, struct gkyl_array *mask_out);
+
+#ifdef __cplusplus
+}
 #endif
+#endif
\ No newline at end of file
diff --git a/gyrokinetic/zero/gkyl_position_map_priv.h b/gyrokinetic/zero/gkyl_position_map_priv.h
index ab3b3ae3c5..f0b40142ac 100644
--- a/gyrokinetic/zero/gkyl_position_map_priv.h
+++ b/gyrokinetic/zero/gkyl_position_map_priv.h
@@ -276,13 +276,13 @@ calc_bmag_global_derivative(double theta, void *ctx)
   double fout[3];
   xh[0] = gpm->constB_ctx->psi;
   xh[1] = gpm->constB_ctx->alpha;
-  xh[2] = theta - h;
+  xh[2] = theta + h;
   gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx);
   double Bmag_plus = fout[0];
-  xh[2] = theta - 2*h;
+  xh[2] = theta - h;
   gkyl_calc_bmag_global(0.0, xh, fout, bmag_ctx);
   double Bmag_minus = fout[0];
-  return (Bmag_plus - Bmag_minus) / (h);
+  return (Bmag_plus - Bmag_minus) / (2*h);
 }
 
 /**
@@ -314,15 +314,24 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
   double *theta_extrema = gkyl_malloc(sizeof(double) * (npts + 1));
   double *bmag_extrema = gkyl_malloc(sizeof(double) * (npts + 1));
 
-  for (int i = 0; i <= npts; i++){
+  for (int i = 1; i < npts; i++){
     double theta = theta_lo + i * theta_dxi;
     xp[Z_IDX] = theta;
     gkyl_calc_bmag_global(0.0, xp, &bmag_vals[i], bmag_ctx);
     dbmag_vals[i] = calc_bmag_global_derivative(theta, gpm);
-    if (i==0) continue;
 
-    // Minima
-    if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0){
+    // Near-zero derivative: B is locally flat here, record as a minimum.
+    // Use continue so this is mutually exclusive with the sign-change checks below.
+    if (fabs(dbmag_vals[i]) < 1e-10) {
+      theta_extrema[extrema] = theta;
+      bmag_extrema[extrema] = bmag_vals[i];
+      extrema++;
+      continue;
+    }
+
+    // Minima via sign change. Guard on |dbmag[i-1]| to avoid a double-record if the
+    // previous point was already captured by the near-zero branch above.
+    if (dbmag_vals[i] > 0 && dbmag_vals[i-1] < 0 && fabs(dbmag_vals[i-1]) >= 1e-10){
       if (bmag_vals[i] < bmag_vals[i-1])
       {
         theta_extrema[extrema] = theta;
@@ -337,8 +346,8 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
       }
     }
 
-    // Maxima
-    if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0){
+    // Maxima via sign change. Guard on |dbmag[i-1]| for the same reason.
+    if (dbmag_vals[i] < 0 && dbmag_vals[i-1] > 0 && fabs(dbmag_vals[i-1]) >= 1e-10){
       if (bmag_vals[i] > bmag_vals[i-1])
       {
         theta_extrema[extrema] = theta;
@@ -375,30 +384,53 @@ find_B_field_extrema(struct gkyl_position_map *gpm)
 
   // Left edge
   if (bmag_extrema[0] > bmag_extrema[1])
-  {    gpm->constB_ctx->min_or_max[0] = 1;  } // Maximum
+  {
+    gpm->constB_ctx->min_or_max[0] = 1;  // Maximum
+  }
   else if (bmag_extrema[0] < bmag_extrema[1])
-  {    gpm->constB_ctx->min_or_max[0] = 0;  } // Minimum
+  {
+    gpm->constB_ctx->min_or_max[0] = 0;  // Minimum
+  }
   else
-  {    printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+  {
+    printf("Error: Extrema[0] is not an extrema (bmag[0]=%.6g == bmag[1]=%.6g). "
+      "Position_map optimization failed\n", bmag_extrema[0], bmag_extrema[1]);
+  }
 
   // Middle points
   for (int i = 1; i < extrema - 1; i++)
   {
     if (bmag_extrema[i] > bmag_extrema[i-1] && bmag_extrema[i] > bmag_extrema[i+1])
-    {      gpm->constB_ctx->min_or_max[i] = 1;    } // Maximum
+    {
+      gpm->constB_ctx->min_or_max[i] = 1;  // Maximum
+    }
     else if (bmag_extrema[i] < bmag_extrema[i-1] && bmag_extrema[i] < bmag_extrema[i+1])
-    {      gpm->constB_ctx->min_or_max[i] = 0;    } // Minimum
+    {
+      gpm->constB_ctx->min_or_max[i] = 0;  // Minimum
+    }
     else
-    {      printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+    {
+      printf("Error: Extrema[%d] is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g, bmag[%d+1]=%.6g). "
+        "Position_map optimization failed\n",
+        i, i, bmag_extrema[i-1], i, bmag_extrema[i], i, bmag_extrema[i+1]);
+    }
   }
 
   // Right edge
   if (bmag_extrema[extrema-1] > bmag_extrema[extrema-2])
-  {    gpm->constB_ctx->min_or_max[extrema-1] = 1; } // Maximum
+  {
+    gpm->constB_ctx->min_or_max[extrema-1] = 1; // Maximum
+  }
   else if (bmag_extrema[extrema-1] < bmag_extrema[extrema-2])
-  {    gpm->constB_ctx->min_or_max[extrema-1] = 0; } // Minimum
-  else  
-  {    printf("Error: Extrema is not an extrema. Position_map optimization failed\n");  }
+  {
+    gpm->constB_ctx->min_or_max[extrema-1] = 0; // Minimum
+  }
+  else
+  {
+    printf("Error: Extrema[%d] (right edge) is not an extrema (bmag[%d-1]=%.6g, bmag[%d]=%.6g). "
+      "Position_map optimization failed\n",
+      extrema-1, extrema-1, bmag_extrema[extrema-2], extrema-1, bmag_extrema[extrema-1]);
+  }
 
   // Free mallocs
   gkyl_free(bmag_vals);
@@ -454,7 +486,7 @@ refine_B_field_extrema(struct gkyl_position_map *gpm)
     else if (bmag_cent < bmag_left && bmag_cent < bmag_right)
     { is_maximum = false; } // Local minima
     else
-    { printf("Error: Extrema is not an extrema. Position_map optimization failed\n");
+    { // printf("Error: Extrema is not an extrema. Position_map optimization failed\n");
       break;
     }
 
@@ -655,7 +687,7 @@ position_map_constB_z_numeric(double t, const double *xn, double *fout, void *ct
         return;
       }
       else {
-        fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n");
+        // fprintf(stderr, "Warning: Unexpected interval evaluation state in position_map_constB_z_numeric. Using theta directly.\n");
         fout[0] = theta;
         return;
       }
@@ -666,14 +698,16 @@ position_map_constB_z_numeric(double t, const double *xn, double *fout, void *ct
     interval_lower, interval_upper, interval_lower_eval, interval_upper_eval, 10, 1e-6);
   double Theta = res.res;
   fout[0] = Theta*gpm->constB_ctx->map_strength + theta*(1-gpm->constB_ctx->map_strength); 
-
   bool enable_limits_min_B = gpm->constB_ctx->enable_maximum_slope_limits_at_min_B;
   bool enable_limits_max_B = gpm->constB_ctx->enable_maximum_slope_limits_at_max_B;
 
   if (enable_limits_min_B || enable_limits_max_B)
   {
-    // Set a minimum cell size on the edges
-    // Assume that at inflection points, Theta = theta. This should be true
+    // Set a minimum cell size on the edges.
+    // Build limiter lines from inflection-point anchors in point-slope form:
+    //   y = y0 + m * (x - x0)
+    // where x0 is the computational-coordinate location of a region boundary
+    // and y0 is the mapped extrema location in physical theta.
     double Theta_left  = interval_lower;
     double Theta_right = interval_upper;
     double theta_middle = 0.5 * (interval_lower + interval_upper);
@@ -693,20 +727,19 @@ position_map_constB_z_numeric(double t, const double *xn, double *fout, void *ct
     double max_slope_min_B = gpm->constB_ctx->maximum_slope_at_min_B;
     double max_slope_max_B = gpm->constB_ctx->maximum_slope_at_max_B;
 
-    double right_straight_line_value, left_straight_line_value;
-    if (left_is_maximum){
-      left_straight_line_value = max_slope_max_B * theta + (1-max_slope_max_B) * Theta_left;
-    }
-    else {
-      left_straight_line_value = max_slope_min_B * theta + (1-max_slope_min_B) * Theta_left;
-    }
-
-    if (right_is_maximum){
-      right_straight_line_value = max_slope_max_B * theta + (1-max_slope_max_B) * Theta_right;
-    }
-    else {
-      right_straight_line_value = max_slope_min_B * theta + (1-max_slope_min_B) * Theta_right;
-    }
+    // Compute x-locations (in computational theta) of left/right inflection anchors.
+    double seg_dB = fabs(gpm->constB_ctx->bmag_extrema[region+1] - gpm->constB_ctx->bmag_extrema[region]);
+    double it_left_inflection = dB_global_lower / dB_cell;
+    double it_right_inflection = (dB_global_lower + seg_dB) / dB_cell;
+    double theta_left_inflection = theta_lo + it_left_inflection * theta_dxi;
+    double theta_right_inflection = theta_lo + it_right_inflection * theta_dxi;
+
+    // Build limiter lines in point-slope form so they pass through
+    // (theta_left_inflection, Theta_left) and (theta_right_inflection, Theta_right).
+    double left_slope = left_is_maximum ? max_slope_max_B : max_slope_min_B;
+    double right_slope = right_is_maximum ? max_slope_max_B : max_slope_min_B;
+    double left_straight_line_value = Theta_left + left_slope * (theta - theta_left_inflection);
+    double right_straight_line_value = Theta_right + right_slope * (theta - theta_right_inflection);
 
     if ( fout[0] < right_straight_line_value && 
       ((right_is_maximum && enable_limits_max_B) || 
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
index 8eee0f3ef6..028c73de87 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic.c
@@ -1,485 +1,147 @@
-#include <string.h>
+#include <float.h>
 #include <math.h>
 
 #include <gkyl_alloc.h>
 #include <gkyl_array.h>
-#include <gkyl_array_ops.h>
 #include <gkyl_const.h>
-#include <gkyl_gauss_quad_data.h>
-#include <gkyl_mat.h>
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
 #include <gkyl_loss_cone_mask_gyrokinetic_priv.h>
 #include <gkyl_range.h>
-#include <assert.h>
 
-//
-// mu_bound = (0.5*mass*pow(vpar,2)+charge*Delta_phi)/(bmag[0]*(Rm-1));
-//          = 0.5*mass*pow(vpar,2)/(bmag[0]*(Rm-1)) + charge*Delta_phi/(bmag[0]*(Rm-1));
-//          = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]) + charge*(phi-phi_m)/(bmag_max-bmag[0]);
-//
+// The loss-cone mask is built from the escape barrier
+// EB(z,mu) = min( max_{s in [z_L,z]} U(s,mu), max_{s in [z,z_R]} U(s,mu) )
+// with U = mu*B + q*phi. A node is trapped if H < EB.
 
-// create range to loop over quadrature points.
-static inline struct gkyl_range
-get_qrange(int cdim, int dim, int num_quad, int num_quad_v, bool *is_vdim_p2)
+static struct gkyl_array*
+mkarr(long nc, long size, bool use_gpu)
 {
-  int qshape[GKYL_MAX_DIM];
-  for (int i=0; i<cdim; ++i) qshape[i] = num_quad;
-  for (int i=cdim; i<dim; ++i) qshape[i] = is_vdim_p2[i-cdim] ? num_quad_v : num_quad;
-  struct gkyl_range qrange;
-  gkyl_range_init_from_shape(&qrange, dim, qshape);
-  return qrange;
+  return use_gpu? gkyl_array_cu_dev_new(GKYL_DOUBLE, nc, size)
+    : gkyl_array_new(GKYL_DOUBLE, nc, size);
 }
 
-// Sets ordinates, weights and basis functions at ords.
-// Returns the total number of quadrature nodes
 static int
-init_quad_values(int cdim, const struct gkyl_basis *basis, enum gkyl_quad_type qtype,
-  int num_quad, struct gkyl_array **ordinates, struct gkyl_array **weights,
-  struct gkyl_array **basis_at_ords, bool use_gpu)
+init_node_values(int ndim, const struct gkyl_basis *basis,
+  struct gkyl_array **basis_at_nodes, bool use_gpu)
 {
-  int ndim = basis->ndim;
-  int num_quad_v = num_quad;
-  // Hybrid basis have p=2 in velocity space.
-  bool is_vdim_p2[2] = {false};  // 2 is the max vdim for GK.
-  if (num_quad > 1 && basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) {
-    num_quad_v = num_quad+1;
-    is_vdim_p2[0] = true;  // only vpar is quadratic in GK hybrid.
-  }
-
-  double ordinates1[num_quad], weights1[num_quad];
-  double ordinates1_v[num_quad_v], weights1_v[num_quad_v];
-
-  if (qtype == GKYL_GAUSS_QUAD) {
-    if (num_quad <= gkyl_gauss_max) {
-      // Use pre-computed values if possible (these are more accurate than computing them on the fly).
-      memcpy(ordinates1, gkyl_gauss_ordinates[num_quad], sizeof(double[num_quad]));
-      memcpy(weights1, gkyl_gauss_weights[num_quad], sizeof(double[num_quad]));
-    }
-    else {
-      gkyl_gauleg(-1, 1, ordinates1, weights1, num_quad);
-    }
-    if (num_quad_v <= gkyl_gauss_max) {
-      memcpy(ordinates1_v, gkyl_gauss_ordinates[num_quad_v], sizeof(double[num_quad_v]));
-      memcpy(weights1_v, gkyl_gauss_weights[num_quad_v], sizeof(double[num_quad_v]));
-    }
-    else {
-      gkyl_gauleg(-1, 1, ordinates1_v, weights1_v, num_quad_v);
-    }
-  }
-  else if (qtype == GKYL_GAUSS_LOBATTO_QUAD) {
-    assert( (num_quad > 1) && (num_quad <= gkyl_gauss_max) );
-    // Gauss-Lobatto quadrature
-    memcpy(ordinates1, gkyl_gauss_lobatto_ordinates[num_quad], sizeof(double[num_quad]));
-    memcpy(weights1, gkyl_gauss_lobatto_weights[num_quad], sizeof(double[num_quad]));
+  int num_nodes = (int)pow(2.0, (double)ndim);
+  struct gkyl_array *basis_at_nodes_ho = gkyl_array_new(GKYL_DOUBLE, basis->num_basis,
+    num_nodes);
+  *basis_at_nodes = mkarr(basis->num_basis, num_nodes, use_gpu);
 
-    assert( (num_quad_v > 1) && (num_quad_v <= gkyl_gauss_max) );
-    memcpy(ordinates1_v, gkyl_gauss_lobatto_ordinates[num_quad_v], sizeof(double[num_quad_v]));
-    memcpy(weights1_v, gkyl_gauss_lobatto_weights[num_quad_v], sizeof(double[num_quad_v]));
-  }
-  else {
-    fprintf(stderr, "Quadrature rule not available.\n");
-    assert(false);
+  double eta[GKYL_MAX_DIM] = { 0.0 };
+  for (int node = 0; node < num_nodes; ++node) {
+    nodal_coords(ndim, node, eta);
+    basis->eval(eta, gkyl_array_fetch(basis_at_nodes_ho, node));
   }
 
-  struct gkyl_range qrange = get_qrange(cdim, ndim, num_quad, num_quad_v, is_vdim_p2);
-
-  int tot_quad = qrange.volume;
-
-  // create ordinates and weights for multi-D quadrature
-  struct gkyl_array *ordinates_ho = gkyl_array_new(GKYL_DOUBLE, ndim, tot_quad);
-  struct gkyl_array *weights_ho = gkyl_array_new(GKYL_DOUBLE, 1, tot_quad);
-  if (use_gpu) {
-    *ordinates = gkyl_array_cu_dev_new(GKYL_DOUBLE, ndim, tot_quad);
-    *weights = gkyl_array_cu_dev_new(GKYL_DOUBLE, 1, tot_quad);
-  } 
-  else {
-    *ordinates = gkyl_array_new(GKYL_DOUBLE, ndim, tot_quad);
-    *weights = gkyl_array_new(GKYL_DOUBLE, 1, tot_quad);
-  }
-
-  struct gkyl_range_iter iter;
-  gkyl_range_iter_init(&iter, &qrange);
-
-  while (gkyl_range_iter_next(&iter)) {
-    int node = gkyl_range_idx(&qrange, iter.idx);
-    
-    // set ordinates
-    double *ord = gkyl_array_fetch(ordinates_ho, node);
-    for (int i=0; i<cdim; ++i)
-      ord[i] = ordinates1[iter.idx[i]-qrange.lower[i]];
-
-    for (int i=cdim; i<ndim; ++i)
-      ord[i] = is_vdim_p2[i-cdim] ? 
-        ordinates1_v[iter.idx[i]-qrange.lower[i]] : ordinates1[iter.idx[i]-qrange.lower[i]];
-    
-    // set weights
-    double *wgt = gkyl_array_fetch(weights_ho, node);
-    wgt[0] = 1.0;
-    for (int i=0; i<cdim; ++i)
-      wgt[0] *= weights1[iter.idx[i]-qrange.lower[i]];
-
-    for (int i=cdim; i<ndim; ++i)
-      wgt[0] *= is_vdim_p2[i-cdim] ? 
-        weights1_v[iter.idx[i]-qrange.lower[i]] : weights1[iter.idx[i]-qrange.lower[i]];
-  }
-
-  // Pre-compute basis functions at ordinates.
-  struct gkyl_array *basis_at_ords_ho = gkyl_array_new(GKYL_DOUBLE, basis->num_basis, tot_quad);
-  if (use_gpu)
-    *basis_at_ords = gkyl_array_cu_dev_new(GKYL_DOUBLE, basis->num_basis, tot_quad);
-  else
-    *basis_at_ords = gkyl_array_new(GKYL_DOUBLE, basis->num_basis, tot_quad);
-
-  for (int n=0; n<tot_quad; ++n)
-    basis->eval(gkyl_array_fetch(ordinates_ho, n), gkyl_array_fetch(basis_at_ords_ho, n));
-
-  // Copy host array to device array.
-  gkyl_array_copy(*ordinates, ordinates_ho);
-  gkyl_array_copy(*weights, weights_ho);
-  gkyl_array_copy(*basis_at_ords, basis_at_ords_ho);
-
-  gkyl_array_release(ordinates_ho);
-  gkyl_array_release(weights_ho);
-  gkyl_array_release(basis_at_ords_ho);
-
-  return tot_quad;
-}
-
-static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(gkyl_loss_cone_mask_gyrokinetic *up, 
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max)
-{
-  // Get bmag_max-bmag at quadrature nodes.
-#ifdef GKYL_HAVE_CUDA
-  if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(up, conf_range, bmag, bmag_max);
-#endif
-
-  int cdim = up->cdim, pdim = up->pdim;
-
-  int tot_quad_conf = up->tot_quad_conf;
-  int num_basis_conf = up->num_basis_conf;
-
-  struct gkyl_range_iter conf_iter;
-  gkyl_range_iter_init(&conf_iter, conf_range);
-  while (gkyl_range_iter_next(&conf_iter)) {
-    long linidx = gkyl_range_idx(conf_range, conf_iter.idx);
+  gkyl_array_copy(*basis_at_nodes, basis_at_nodes_ho);
+  gkyl_array_release(basis_at_nodes_ho);
 
-    const double *bmag_d = gkyl_array_cfetch(bmag, linidx);
-    double *Dbmag_quad = gkyl_array_fetch(up->Dbmag_quad, linidx);
-
-    // Sum over basis 
-    for (int n=0; n<tot_quad_conf; ++n) {
-      const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
-      for (int k=0; k<num_basis_conf; ++k)
-        Dbmag_quad[n] += bmag_d[k]*b_ord[k];
-
-      Dbmag_quad[n] = bmag_max[0] - Dbmag_quad[n];
-    }
-  }
+  return num_nodes;
 }
 
-struct gkyl_loss_cone_mask_gyrokinetic* 
+struct gkyl_loss_cone_mask_gyrokinetic*
 gkyl_loss_cone_mask_gyrokinetic_inew(const struct gkyl_loss_cone_mask_gyrokinetic_inp *inp)
 {
   gkyl_loss_cone_mask_gyrokinetic *up = gkyl_malloc(sizeof(*up));
 
-  up->grid_phase = inp->phase_grid;
   up->vel_map = gkyl_velocity_map_acquire(inp->vel_map);
   up->mass = inp->mass;
   up->charge = inp->charge;
-
-  up->cdim = inp->conf_basis->ndim;
-  up->pdim = inp->phase_basis->ndim;
-
-  up->cellwise_trap_loss = inp->cellwise_trap_loss;
-  int num_quad = inp->num_quad? inp->num_quad : inp->phase_basis->poly_order+1;
-  up->norm_fac = 1;
-  if (!up->cellwise_trap_loss)
-    up->norm_fac = num_quad == 1? 1.0/pow(sqrt(2.0),up->pdim) : 1.0;
-
-  if (num_quad == 1) {
-    up->num_basis_conf = 1;
-    up->num_basis_phase = 1;
-  }
-  else {
-    up->num_basis_conf = inp->conf_basis->num_basis;
-    up->num_basis_phase = inp->phase_basis->num_basis;
-  }
   up->use_gpu = inp->use_gpu;
 
-  // Initialize data needed for conf-space quadrature.
-  up->tot_quad_conf = init_quad_values(up->cdim, inp->conf_basis, inp->qtype, num_quad,
-    &up->ordinates_conf, &up->weights_conf, &up->basis_at_ords_conf, false);
-
-  // Initialize data needed for phase-space quadrature.
-  up->tot_quad_phase = init_quad_values(up->cdim, inp->phase_basis, inp->qtype, num_quad,
-    &up->ordinates_phase, &up->weights_phase, &up->basis_at_ords_phase, false);
-
-  up->fun_at_ords = gkyl_array_new(GKYL_DOUBLE, 1, up->tot_quad_phase); // Only used in CPU implementation.
-
-  // To avoid creating iterators over ranges in device kernel, we'll
-  // create a map between phase-space and conf-space ordinates.
-  int num_quad_v = num_quad;  // Hybrid basis have p=2 in velocity space.
-  // hybrid basis have p=2 in velocity space.
-  bool is_vdim_p2[2] = {false};  // 2 is the max vdim for GK.
-  if (num_quad > 1 && inp->phase_basis->b_type == GKYL_BASIS_MODAL_GKHYBRID) {
-    num_quad_v = num_quad+1;
-    is_vdim_p2[0] = true;  // only vpar is quadratic in GK hybrid.
-  }
-  up->conf_qrange = get_qrange(up->cdim, up->cdim, num_quad, num_quad_v, is_vdim_p2);
-  up->phase_qrange = get_qrange(up->cdim, up->pdim, num_quad, num_quad_v, is_vdim_p2);
-
-  long conf_local_ncells = inp->conf_range->volume;
-  long conf_local_ext_ncells = inp->conf_range_ext->volume;
-
-#ifdef GKYL_HAVE_CUDA
-  if (up->use_gpu) {
-    // Allocate device copies of arrays needed for quadrature.
-
-    int p2c_qidx_ho[up->phase_qrange.volume];
-    up->p2c_qidx = (int*) gkyl_cu_malloc(sizeof(int)*up->phase_qrange.volume);
-
-    // Allocate mask_quad at phase-space quadrature points.
-    // Dbmag_quad at configuration-space quadrature points.
-    // qDphiDbmag_quad, the term proportional to (phi-phi_m)/(bmag_max-bmag), at quadrature points.
-    up->mask_out_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_phase,
-      inp->conf_range_ext->volume*inp->vel_range->volume);
-    up->qDphiDbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-
-    // Allocate the memory for computing the specific phase nodal to modal calculation
-    struct gkyl_mat_mm_array_mem *phase_nodal_to_modal_mem_ho;
-    phase_nodal_to_modal_mem_ho = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, 
-      GKYL_NO_TRANS, GKYL_NO_TRANS, false);
-
-    // Compute the matrix A for the phase nodal to modal memory
-    const double *phase_w = (const double*) up->weights_phase->data;
-    const double *phaseb_o = (const double*) up->basis_at_ords_phase->data;
-    for (int n=0; n<up->tot_quad_phase; ++n) {
-      for (int k=0; k<up->num_basis_phase; ++k)
-        gkyl_mat_set(phase_nodal_to_modal_mem_ho->A, k, n, phase_w[n]*phaseb_o[k+up->num_basis_phase*n]);
-    }
-    
-    // Copy to device
-    up->phase_nodal_to_modal_mem = gkyl_mat_mm_array_mem_new(up->num_basis_phase, up->tot_quad_phase, 1.0, 0.0, 
-      GKYL_NO_TRANS, GKYL_NO_TRANS, up->use_gpu);
-    gkyl_mat_copy(up->phase_nodal_to_modal_mem->A, phase_nodal_to_modal_mem_ho->A);
-    gkyl_mat_mm_array_mem_release(phase_nodal_to_modal_mem_ho);
-
-    // Initialize data needed for conf-space quadrature on device.
-    up->tot_quad_conf = init_quad_values(up->cdim, inp->conf_basis, inp->qtype, num_quad,
-      &up->ordinates_conf, &up->weights_conf, &up->basis_at_ords_conf, up->use_gpu);
-
-    // Initialize data needed for phase-space quadrature on device.
-    up->tot_quad_phase = init_quad_values(up->cdim, inp->phase_basis, inp->qtype, num_quad,
-      &up->ordinates_phase, &up->weights_phase, &up->basis_at_ords_phase, up->use_gpu);
-
-    int pidx[GKYL_MAX_DIM];
-    for (int n=0; n<up->tot_quad_phase; ++n) {
-      gkyl_range_inv_idx(&up->phase_qrange, n, pidx);
-      int cqidx = gkyl_range_idx(&up->conf_qrange, pidx);
-      p2c_qidx_ho[n] = cqidx;
-    }
-    gkyl_cu_memcpy(up->p2c_qidx, p2c_qidx_ho, sizeof(int)*up->phase_qrange.volume, GKYL_CU_MEMCPY_H2D);
-  }
-#endif
-
-  // Allocate and obtain bmag_max-bmag at quadrature points.
-  if (up->use_gpu) 
-    up->Dbmag_quad = gkyl_array_cu_dev_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
-  else
-    up->Dbmag_quad = gkyl_array_new(GKYL_DOUBLE, up->tot_quad_conf, inp->conf_range_ext->volume);
+  up->cdim = inp->conf_basis->ndim;
+  up->num_basis_conf = inp->conf_basis->num_basis;
+  up->num_nodes_conf = (int)pow(2.0, (double)up->cdim);
 
-  gkyl_array_clear(up->Dbmag_quad, 0.0); 
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad(up, inp->conf_range, inp->bmag, inp->bmag_max);
+  init_node_values(up->cdim, inp->conf_basis, &up->basis_at_nodes_conf, inp->use_gpu);
 
-  // Save the location of bmag_max in this updater.
-  if (up->use_gpu) {
-    up->bmag_max_loc = gkyl_cu_malloc(sizeof(double)*up->cdim);
-    gkyl_cu_memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim, GKYL_CU_MEMCPY_D2D);
-  }
-  else {
-    up->bmag_max_loc = gkyl_malloc(sizeof(double)*up->cdim);
-    memcpy(up->bmag_max_loc, inp->bmag_max_loc, sizeof(double)*up->cdim);
-  }
-    
   return up;
 }
 
-static void
-proj_on_basis(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f)
-{
-  int num_basis = up->num_basis_phase;
-  int tot_quad = up->tot_quad_phase;
-
-  const double* GKYL_RESTRICT weights = up->weights_phase->data;
-  const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
-  const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
-
-  for (int k=0; k<num_basis; ++k) f[k] = 0.0;
-  
-  for (int imu=0; imu<tot_quad; ++imu) {
-    double tmp = weights[imu]*func_at_ords[imu];
-    for (int k=0; k<num_basis; ++k)
-      f[k] += tmp*basis_at_ords[k+num_basis*imu];
-  }
-}
-
-static void
-nod_to_mod_reduce(const gkyl_loss_cone_mask_gyrokinetic *up, const struct gkyl_array *fun_at_ords, double* f)
-{
-  int num_basis = up->num_basis_phase;
-  int tot_quad = up->tot_quad_phase;
-
-  const double* GKYL_RESTRICT weights = up->weights_phase->data;
-  const double* GKYL_RESTRICT basis_at_ords = up->basis_at_ords_phase->data;
-  const double* GKYL_RESTRICT func_at_ords = fun_at_ords->data;
-
-  for (int k=0; k<num_basis; ++k) f[k] = 0.0;
-  f[0] = 1.0;
-  
-  for (int imu=0; imu<tot_quad; ++imu) {
-    if (func_at_ords[imu] < 1e-14) {
-      f[0] = 0.0;
-      break;
-    }
-  }
-}
-
 void
 gkyl_loss_cone_mask_gyrokinetic_advance(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *bmag, const struct gkyl_array *phi, struct gkyl_array *mask_out)
 {
-
 #ifdef GKYL_HAVE_CUDA
-  if (up->use_gpu)
-    return gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range, 
-      phi, phi_m, mask_out);
+  if (up->use_gpu) {
+    gkyl_loss_cone_mask_gyrokinetic_advance_cu(up, phase_range, conf_range, bmag, phi, mask_out);
+    return;
+  }
 #endif
 
-  int cdim = up->cdim, pdim = up->pdim;
-  int vdim = pdim-cdim;
-
-  int tot_quad_conf = up->tot_quad_conf;
+  int cdim = up->cdim;
+  int pdim = phase_range->ndim;
+  int vdim = pdim - cdim;
   int num_basis_conf = up->num_basis_conf;
-
-  struct gkyl_range vel_rng;
-  struct gkyl_range_iter conf_iter, vel_iter;
-
-  int pidx[GKYL_MAX_DIM], rem_dir[GKYL_MAX_DIM] = { 0 };
-  for (int d=0; d<conf_range->ndim; ++d) rem_dir[d] = 1;
-
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
-  double phi_quad[tot_quad_conf];
-  double qDphiDbmag_quad[tot_quad_conf]; // charge*(phi-phi_m)/(bmag_max-bmag[0]).
-
-  // Outer loop over configuration space cells; for each
-  // config-space cell inner loop walks over velocity space.
-  gkyl_range_iter_init(&conf_iter, conf_range);
-  while (gkyl_range_iter_next(&conf_iter)) {
-    long linidx_conf = gkyl_range_idx(conf_range, conf_iter.idx);
-
-    const double *phi_d = gkyl_array_cfetch(phi, linidx_conf);
-    const double *Dbmag_quad = gkyl_array_cfetch(up->Dbmag_quad, linidx_conf);
-
-    // Sum over basis for given potential phi.
-    for (int n=0; n<tot_quad_conf; ++n) {
-      const double *b_ord = gkyl_array_cfetch(up->basis_at_ords_conf, n);
-
-      // Compute the configuration-space quadrature
-      phi_quad[n] = 0.0;
-      for (int k=0; k<num_basis_conf; ++k)
-        phi_quad[n] += phi_d[k]*b_ord[k];
-
-      if (Dbmag_quad[n] > 0.0)
-        qDphiDbmag_quad[n] = up->charge*(phi_quad[n]-phi_m[0])/Dbmag_quad[n];
-      else
-        qDphiDbmag_quad[n] = 0.0;
+  int num_phase_nodes = (int)pow(2.0, (double)pdim);
+  int num_vel_nodes = (int)pow(2.0, (double)vdim);
+
+  // Outer loop over all phase space cells
+  struct gkyl_range_iter phase_iter;
+  gkyl_range_iter_init(&phase_iter, phase_range);
+  while (gkyl_range_iter_next(&phase_iter)) {
+    int conf_idx[GKYL_MAX_DIM] = { 0 };
+    int vel_idx[GKYL_MAX_DIM] = { 0 };
+    for (int d = 0; d < cdim; ++d) {
+      conf_idx[d] = phase_iter.idx[d];
+    }
+    for (int d = 0; d < vdim; ++d) {
+      vel_idx[d] = phase_iter.idx[cdim + d];
     }
 
-    // Inner loop over velocity space.
-    gkyl_range_deflate(&vel_rng, phase_range, rem_dir, conf_iter.idx);
-    gkyl_range_iter_no_split_init(&vel_iter, &vel_rng);
-    while (gkyl_range_iter_next(&vel_iter)) {
-      
-      copy_idx_arrays(conf_range->ndim, phase_range->ndim, conf_iter.idx, vel_iter.idx, pidx);
-      long linidx_phase = gkyl_range_idx(&vel_rng, vel_iter.idx);
-
-      // Compute the mask function at phase-space quadrature nodes.
-      struct gkyl_range_iter qiter;
-      gkyl_range_iter_init(&qiter, &up->phase_qrange);
-      while (gkyl_range_iter_next(&qiter)) {
-
-        int cqidx = gkyl_range_idx(&up->conf_qrange, qiter.idx);
-        int pqidx = gkyl_range_idx(&up->phase_qrange, qiter.idx);
-
-        const double *xcomp_d = gkyl_array_cfetch(up->ordinates_phase, pqidx);
-
-        // Convert comp position coordinate to phys pos coord.
-        gkyl_rect_grid_cell_center(up->grid_phase, pidx, xc);
-        log_to_comp(up->cdim, xcomp_d, up->grid_phase->dx, xc, xmu);
-
-        // Convert comp velocity coordinate to phys velocity coord.
-        const struct gkyl_velocity_map *gvm = up->vel_map;
-        long linidx_vel = gkyl_range_idx(&gvm->local_ext_vel, vel_iter.idx);
-        const double *vmap_d = gkyl_array_cfetch(gvm->vmap, linidx_vel);
-        double xcomp[1];
-        for (int vd = 0; vd < vdim; vd++) {
-          xcomp[0] = xcomp_d[cdim+vd];
-          xmu[cdim+vd] = gvm->vmap_basis->eval_expand(xcomp, vmap_d+vd*gvm->vmap_basis->num_basis);
-        }
-
-        // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-        double KEparDbmag = 0.0;
-        if (Dbmag_quad[cqidx] > 0.0)
-          KEparDbmag = 0.5*up->mass*pow(xmu[cdim], 2.0)/Dbmag_quad[cqidx];
-        else
-          KEparDbmag = 0.0;
-
-	double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad[cqidx]);
-
-        double *fq = gkyl_array_fetch(up->fun_at_ords, pqidx);
-	if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(up->bmag_max_loc[cdim-1])) 
-          fq[0] = 1.0 * up->norm_fac;
-        else
-          fq[0] = 0.0;
+    const struct gkyl_velocity_map *gvm = up->vel_map;
+    long linidx_vel = gkyl_range_idx(&gvm->local_ext_vel, vel_idx);
+    const double *vmap_d = gkyl_array_cfetch(gvm->vmap, linidx_vel);
+
+    // At each phase space cell, loop over all nodes and check if they are trapped or not. If any node is not trapped, the whole cell is not trapped, so we can break out of the loop early in that case.
+    bool cell_trapped = true;
+    for (int node = 0; node < num_phase_nodes && cell_trapped; ++node) {
+      int conf_node = node / num_vel_nodes;
+      int vel_node = node % num_vel_nodes;
+
+      // Evaluate velocity coordinates at the node
+      double xmu[GKYL_MAX_DIM] = { 0.0 };
+      for (int vd = 0; vd < vdim; ++vd) {
+        double vel_eta[GKYL_MAX_DIM] = { 0.0 };
+        nodal_coords(vdim, vel_node, vel_eta);
+        double xcomp[1] = { vel_eta[vd] };
+        xmu[cdim + vd] = gvm->vmap_basis->eval_expand(xcomp,
+          vmap_d + vd * gvm->vmap_basis->num_basis);
       }
-      // Compute DG expansion coefficients of the mask.
-      if (up->cellwise_trap_loss)
-        nod_to_mod_reduce(up, up->fun_at_ords, gkyl_array_fetch(mask_out, linidx_phase));
-      else
-        proj_on_basis(up, up->fun_at_ords, gkyl_array_fetch(mask_out, linidx_phase));
+      double mu = xmu[cdim + 1];
+      double vpar = xmu[cdim];
+
+      // Evaluate Hamiltonian at the node
+      long linidx_conf = gkyl_range_idx(conf_range, conf_idx);
+      double bmag_curr = field_node_val(bmag, up->basis_at_nodes_conf, num_basis_conf,
+        linidx_conf, conf_node);
+      double phi_curr = field_node_val(phi, up->basis_at_nodes_conf, num_basis_conf,
+        linidx_conf, conf_node);
+      double h_curr = 0.5 * up->mass * vpar * vpar + mu * bmag_curr + up->charge * phi_curr;
+
+      // Determine escape barriers at the node
+      int zdim = cdim - 1;
+      double barrier_left, barrier_right;
+      escape_barriers(cdim, num_basis_conf, conf_range, up->basis_at_nodes_conf,
+        phi, bmag, conf_idx, conf_idx[zdim], conf_node, mu, up->charge,
+        &barrier_left, &barrier_right);
+
+      // If Hamiltonian is above either barrier, the node is not trapped, so the whole cell is not trapped.
+      cell_trapped = h_curr < GKYL_MIN2(barrier_left, barrier_right);
     }
+
+    long linidx_phase = gkyl_range_idx(phase_range, phase_iter.idx);
+    double *mask_d = gkyl_array_fetch(mask_out, linidx_phase);
+    mask_d[0] = cell_trapped ? 1.0 : 0.0;
   }
 }
 
 void
-gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic* up)
+gkyl_loss_cone_mask_gyrokinetic_release(gkyl_loss_cone_mask_gyrokinetic *up)
 {
   gkyl_velocity_map_release(up->vel_map);
-
-  gkyl_array_release(up->ordinates_phase);
-  gkyl_array_release(up->weights_phase);
-  gkyl_array_release(up->basis_at_ords_phase);
-
-  gkyl_array_release(up->ordinates_conf);
-  gkyl_array_release(up->weights_conf);
-  gkyl_array_release(up->basis_at_ords_conf);
-
-  gkyl_array_release(up->fun_at_ords);
-  gkyl_array_release(up->Dbmag_quad);
-
-  if (up->use_gpu) {
-    gkyl_cu_free(up->p2c_qidx);
-    gkyl_array_release(up->mask_out_quad);
-    gkyl_array_release(up->qDphiDbmag_quad);
-    gkyl_mat_mm_array_mem_release(up->phase_nodal_to_modal_mem);
-    gkyl_cu_free(up->bmag_max_loc);
-  }
-  else {
-    gkyl_free(up->bmag_max_loc);
-  }
-
+  gkyl_array_release(up->basis_at_nodes_conf);
   gkyl_free(up);
 }
diff --git a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
index e38c8780c5..69be13df89 100644
--- a/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
+++ b/gyrokinetic/zero/loss_cone_mask_gyrokinetic_cu.cu
@@ -1,277 +1,106 @@
 /* -*- c++ -*- */
-#include <cuda_runtime.h>
-#include <cublas_v2.h>
+#include <float.h>
+#include <math.h>
 
 extern "C" {
-#include <gkyl_alloc.h>
 #include <gkyl_array.h>
-#include <gkyl_array_ops.h>
 #include <gkyl_const.h>
-#include <gkyl_gauss_quad_data.h>
 #include <gkyl_loss_cone_mask_gyrokinetic.h>
-#include <gkyl_loss_cone_mask_gyrokinetic_priv.h>
 #include <gkyl_range.h>
-
-#include <gkyl_mat.h>
-#include <gkyl_mat_priv.h>
-}
-
-__global__ static void
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker(struct gkyl_range conf_range,
-  const struct gkyl_array* basis_at_ords_conf, const struct gkyl_array* bmag, const double *bmag_max,
-  struct gkyl_array* Dbmag_quad_d)
-{    
-  int num_basis_conf = basis_at_ords_conf->ncomp;
-  int tot_quad_conf = basis_at_ords_conf->size;
-
-  int cidx[GKYL_MAX_CDIM];
-
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
-
-    gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
-    long linidx = gkyl_range_idx(&conf_range, cidx);
-
-    const double *bmag_d = (const double*) gkyl_array_cfetch(bmag, linidx);
-
-    double *bmag_quad = (double*) gkyl_array_fetch(Dbmag_quad_d, linidx);
-
-    for (int n=0; n<tot_quad_conf; ++n) {
-      const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, n);
-
-      for (int k=0; k<num_basis_conf; ++k)
-        bmag_quad[n] += bmag_d[k]*b_ord[k];
-
-      bmag_quad[n] = bmag_max[0] - bmag_quad[n];
-    }
-  }
-}
-
-void 
-gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu(gkyl_loss_cone_mask_gyrokinetic *up,
-  const struct gkyl_range *conf_range, const struct gkyl_array *bmag, const double *bmag_max)
-{
-  int nblocks = conf_range->nblocks, nthreads = conf_range->nthreads;
-  gkyl_loss_cone_mask_gyrokinetic_Dbmag_quad_cu_ker<<<nblocks, nthreads>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, bmag->on_dev, bmag_max, up->Dbmag_quad->on_dev);
-}
-
-static void
-gkyl_parallelize_components_kernel_launch_dims(dim3* dimGrid, dim3* dimBlock, gkyl_range range, int ncomp)
-{
-  // Create a 2D thread grid so we launch ncomp*range.volume number of threads 
-  // so we can parallelize over components too
-  dimBlock->y = ncomp; // ncomp *must* be less than 256
-  dimGrid->y = 1;
-  dimBlock->x = GKYL_DEFAULT_NUM_THREADS/ncomp;
-  dimGrid->x = gkyl_int_div_up(range.volume, dimBlock->x);
+#include <gkyl_velocity_map.h>
 }
 
-__global__ static void
-gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker(struct gkyl_range conf_range, 
-  const struct gkyl_array* basis_at_ords_conf, double charge, const struct gkyl_array* phi,
-  const double *phi_m, const struct gkyl_array* Dbmag_quad, struct gkyl_array* qDphiDbmag_quad)
-{
-  int num_basis_conf = basis_at_ords_conf->ncomp;
-
-  int cidx[GKYL_MAX_CDIM];
-
-  // 2D thread grid
-  // linc2 goes from 0 to tot_quad_conf= basis_at_ords_conf->size.
-  long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < conf_range.volume; tid += blockDim.x*gridDim.x) {
-    gkyl_sub_range_inv_idx(&conf_range, tid, cidx);
-
-    long linidx = gkyl_range_idx(&conf_range, cidx);
-
-    const double *phi_d = (const double*) gkyl_array_cfetch(phi, linidx);
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx);
-
-    // Sum over basis at configuration-space quadrature points. 
-    const double *b_ord = (const double*) gkyl_array_cfetch(basis_at_ords_conf, linc2);
-    double phi_quad = 0;
-    for (int k=0; k<num_basis_conf; ++k)
-      phi_quad += phi_d[k]*b_ord[k];
-
-    // Potential energy term at each quadrature point.
-    double *qDphiDbmag_quad_d = (double*) gkyl_array_fetch(qDphiDbmag_quad, linidx);
-    if (Dbmag_quad_d[linc2] > 0.0)
-      qDphiDbmag_quad_d[linc2] = charge*(phi_quad-phi_m[0])/Dbmag_quad_d[linc2];
-    else
-      qDphiDbmag_quad_d[linc2] = 0.0;
-  }
-}
+#include <gkyl_loss_cone_mask_gyrokinetic_priv.h>
 
 __global__ static void
-gkyl_loss_cone_mask_gyrokinetic_ker(struct gkyl_rect_grid grid_phase,
-  struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  double mass, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
-  const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out)
+gkyl_loss_cone_mask_gyrokinetic_advance_cu_ker(int cdim, int num_basis_conf,
+  int num_phase_nodes, int num_vel_nodes, double mass, double charge,
+  struct gkyl_range phase_range, struct gkyl_range conf_range,
+  const struct gkyl_array *basis_at_nodes_conf,
+  const struct gkyl_array *bmag, const struct gkyl_array *phi,
+  const struct gkyl_velocity_map *gvm,
+  struct gkyl_array *mask_out)
 {
-  int pdim = phase_range.ndim, cdim = conf_range.ndim;
-  int vdim = pdim-cdim;
-
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
-  int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2];
-
-  int tot_phase_quad = phase_ordinates->size;
-
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
-    gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
+  int pdim = phase_range.ndim;
+  int vdim = pdim - cdim;
 
-    // Get configuration-space linear index.
-    for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k];
-    long linidx_conf = gkyl_range_idx(&conf_range, cidx);
+  int phase_idx[GKYL_MAX_DIM];
+  int conf_idx[GKYL_MAX_DIM] = { 0 };
+  int vel_idx[GKYL_MAX_DIM] = { 0 };
 
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
-    const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
+  for (unsigned long tid = threadIdx.x + blockIdx.x * blockDim.x;
+    tid < phase_range.volume; tid += blockDim.x * gridDim.x) {
 
-    gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
-    long linidx_phase = gkyl_range_idx(&phase_range, pidx);
-    double *mask_d = (double*) gkyl_array_fetch(mask_out, linidx_phase);
+    gkyl_sub_range_inv_idx(&phase_range, tid, phase_idx);
 
-    for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d];
-    long linidx_vel = gkyl_range_idx(&vel_range, vidx);
-    const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel);
-
-    mask_d[0] = 1.0; // In this case the mask has ncomp=1.
-
-    for (int n=0; n<tot_phase_quad; ++n) {
-      int cqidx = p2c_qidx[n];
-
-      const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, n);
-
-      // Convert comp position coordinate to phys pos coord.
-      log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
-  
-      // Convert comp velocity coordinate to phys velocity coord.
-      double xcomp[1];
-      for (int vd = 0; vd < vdim; vd++) {
-        xcomp[0] = xcomp_d[cdim+vd];
-        xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
-      }
-  
-      // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-      double KEparDbmag = 0.0;
-      if (Dbmag_quad_d[cqidx] > 0.0)
-        KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-      else
-        KEparDbmag = 0.0;
-  
-      double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
-  
-      if ( !(mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) ) {
-        mask_d[0] = 0.0;
-        break;
-      }
+    for (int d = 0; d < cdim; ++d) {
+      conf_idx[d] = phase_idx[d];
+    }
+    for (int d = 0; d < vdim; ++d) {
+      vel_idx[d] = phase_idx[cdim + d];
     }
-  }
-}
-
-__global__ static void
-gkyl_loss_cone_mask_gyrokinetic_quad_ker(struct gkyl_rect_grid grid_phase,
-  struct gkyl_range phase_range, struct gkyl_range conf_range, struct gkyl_range vel_range,
-  double mass, double norm_fac, const struct gkyl_array* phase_ordinates, 
-  const double *bmag_max_loc, const struct gkyl_array* qDphiDbmag_quad, const struct gkyl_array* Dbmag_quad,
-  const int *p2c_qidx, struct gkyl_array* vmap, struct gkyl_basis* vmap_basis, struct gkyl_array* mask_out_quad)
-{
-  int pdim = phase_range.ndim, cdim = conf_range.ndim;
-  int vdim = pdim-cdim;
-
-  double xc[GKYL_MAX_DIM], xmu[GKYL_MAX_DIM] = {0.0};
-  int pidx[GKYL_MAX_DIM], cidx[GKYL_MAX_CDIM], vidx[2];
-
-  // 2D thread grid
-  // linc2 goes from 0 to tot_quad_phase
-  long linc2 = threadIdx.y + blockIdx.y*blockDim.y;
-  for(unsigned long tid = threadIdx.x + blockIdx.x*blockDim.x;
-      tid < phase_range.volume; tid += blockDim.x*gridDim.x) {
-    gkyl_sub_range_inv_idx(&phase_range, tid, pidx);
 
-    // Get configuration-space linear index.
-    for (unsigned int k = 0; k < cdim; k++) cidx[k] = pidx[k];
+    long linidx_vel = gkyl_range_idx(&gvm->local_ext_vel, vel_idx);
+    const double *vmap_d = (const double *)gkyl_array_cfetch(gvm->vmap, linidx_vel);
 
-    long linidx_conf = gkyl_range_idx(&conf_range, cidx);
+    bool cell_trapped = true;
+    for (int node = 0; node < num_phase_nodes && cell_trapped; ++node) {
+      int conf_node = node / num_vel_nodes;
+      int vel_node = node % num_vel_nodes;
 
-    const double *Dbmag_quad_d = (const double*) gkyl_array_cfetch(Dbmag_quad, linidx_conf);
-    const double *qDphiDbmag_quad_d = (const double*) gkyl_array_cfetch(qDphiDbmag_quad, linidx_conf);
+      double xmu[GKYL_MAX_DIM] = { 0.0 };
+      double vel_eta[GKYL_MAX_DIM] = { 0.0 };
+      nodal_coords(vdim, vel_node, vel_eta);
 
-    gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
-    long linidx_phase = gkyl_range_idx(&phase_range, pidx);
+      for (int vd = 0; vd < vdim; ++vd) {
+        double xcomp[1] = { vel_eta[vd] };
+        xmu[cdim + vd] = gvm->vmap_basis->eval_expand(xcomp,
+          vmap_d + vd * gvm->vmap_basis->num_basis);
+      }
 
-    int cqidx = p2c_qidx[linc2];
-    for (int d = cdim; d < pdim; d++) vidx[d-cdim] = pidx[d];
+      double mu = xmu[cdim + 1];
+      double vpar = xmu[cdim];
 
-    long linidx_vel = gkyl_range_idx(&vel_range, vidx);
-    const double *vmap_d = (const double*) gkyl_array_cfetch(vmap, linidx_vel);
-    const double *xcomp_d = (const double*) gkyl_array_cfetch(phase_ordinates, linc2);
+      long linidx_conf = gkyl_range_idx(&conf_range, conf_idx);
+      double bmag_curr = field_node_val(bmag, basis_at_nodes_conf, num_basis_conf,
+        linidx_conf, conf_node);
+      double phi_curr = field_node_val(phi, basis_at_nodes_conf, num_basis_conf,
+        linidx_conf, conf_node);
+      double h_curr = 0.5 * mass * vpar * vpar + mu * bmag_curr + charge * phi_curr;
 
-    // Convert comp position coordinate to phys pos coord.
-    gkyl_rect_grid_cell_center(&grid_phase, pidx, xc);
-    log_to_comp(cdim, xcomp_d, grid_phase.dx, xc, xmu);
+      int zdim = cdim - 1;
+      double barrier_left, barrier_right;
+      escape_barriers(cdim, num_basis_conf, &conf_range, basis_at_nodes_conf,
+        phi, bmag, conf_idx, conf_idx[zdim], conf_node, mu, charge,
+        &barrier_left, &barrier_right);
 
-    // Convert comp velocity coordinate to phys velocity coord.
-    double xcomp[1];
-    for (int vd = 0; vd < vdim; vd++) {
-      xcomp[0] = xcomp_d[cdim+vd];
-      xmu[cdim+vd] = vmap_basis->eval_expand(xcomp, vmap_d+vd*vmap_basis->num_basis);
+      cell_trapped = h_curr < GKYL_MIN2(barrier_left, barrier_right);
     }
 
-    // KEparDbmag = 0.5*mass*pow(vpar,2)/(bmag_max-bmag[0]).
-    double KEparDbmag = 0.0;
-    if (Dbmag_quad_d[cqidx] > 0.0)
-      KEparDbmag = 0.5*mass*pow(xmu[cdim], 2.0)/Dbmag_quad_d[cqidx];
-    else
-      KEparDbmag = 0.0;
-
-    double mu_bound = GKYL_MAX2(0.0, KEparDbmag+qDphiDbmag_quad_d[cqidx]);
-
-    double *fq = (double*) gkyl_array_fetch(mask_out_quad, linidx_phase);
-    if (mu_bound < xmu[cdim+1] && fabs(xmu[cdim-1]) < fabs(bmag_max_loc[cdim-1])) 
-      fq[linc2] = norm_fac;
-    else
-      fq[linc2] = 0.0;
+    long linidx_phase = gkyl_range_idx(&phase_range, phase_idx);
+    double *mask_d = (double *)gkyl_array_fetch(mask_out, linidx_phase);
+    mask_d[0] = cell_trapped ? 1.0 : 0.0;
   }
 }
 
-void
+extern "C" void
 gkyl_loss_cone_mask_gyrokinetic_advance_cu(gkyl_loss_cone_mask_gyrokinetic *up,
   const struct gkyl_range *phase_range, const struct gkyl_range *conf_range,
-  const struct gkyl_array *phi, const double *phi_m, struct gkyl_array *mask_out)
+  const struct gkyl_array *bmag, const struct gkyl_array *phi, struct gkyl_array *mask_out)
 {
-  dim3 dimGrid_conf, dimBlock_conf;
-  int tot_quad_conf = up->basis_at_ords_conf->size;
-  gkyl_parallelize_components_kernel_launch_dims(&dimGrid_conf, &dimBlock_conf, *conf_range, tot_quad_conf);
-
-  gkyl_loss_cone_mask_gyrokinetic_qDphiDbmag_quad_ker<<<dimGrid_conf, dimBlock_conf>>>(*conf_range, 
-    up->basis_at_ords_conf->on_dev, up->charge, phi->on_dev, phi_m, up->Dbmag_quad->on_dev,
-    up->qDphiDbmag_quad->on_dev);
+  int pdim = phase_range->ndim;
+  int vdim = pdim - up->cdim;
+  int num_phase_nodes = (int)pow(2.0, (double)pdim);
+  int num_vel_nodes = (int)pow(2.0, (double)vdim);
 
   const struct gkyl_velocity_map *gvm = up->vel_map;
 
-  if (up->cellwise_trap_loss) {
-    // Don't do quadrature.
-    int nblocks = phase_range->nblocks, nthreads = phase_range->nthreads;
-    gkyl_loss_cone_mask_gyrokinetic_ker<<<nblocks, nthreads>>>(*up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, up->mass, up->ordinates_phase->on_dev,
-      up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
-      gvm->vmap_basis, mask_out->on_dev);
-  }
-  else {
-    // Use quadrature.
-    dim3 dimGrid, dimBlock;
-    int tot_quad_phase = up->basis_at_ords_phase->size;
-    gkyl_parallelize_components_kernel_launch_dims(&dimGrid, &dimBlock, *phase_range, tot_quad_phase);
-
-    gkyl_loss_cone_mask_gyrokinetic_quad_ker<<<dimGrid, dimBlock>>>(*up->grid_phase, *phase_range, *conf_range,
-      gvm->local_ext_vel, up->mass, up->norm_fac, up->ordinates_phase->on_dev,
-      up->bmag_max_loc, up->qDphiDbmag_quad->on_dev, up->Dbmag_quad->on_dev, up->p2c_qidx, gvm->vmap->on_dev,
-      gvm->vmap_basis, up->mask_out_quad->on_dev);
+  int nblocks = phase_range->nblocks;
+  int nthreads = phase_range->nthreads;
 
-    // Call cublas to do the matrix multiplication nodal to modal conversion
-    gkyl_mat_mm_array(up->phase_nodal_to_modal_mem, up->mask_out_quad, mask_out);
-  }
+  gkyl_loss_cone_mask_gyrokinetic_advance_cu_ker<<<nblocks, nthreads>>>(
+    up->cdim, up->num_basis_conf, num_phase_nodes, num_vel_nodes,
+    up->mass, up->charge, *phase_range, *conf_range,
+    up->basis_at_nodes_conf->on_dev, bmag->on_dev, phi->on_dev,
+    gvm->on_dev, mask_out->on_dev);
 }