Skip to content

Policy Design

Avirup Sircar edited this page May 16, 2026 · 1 revision

CMake Backend Selection

CMakeLists.txt

if (USE_CUDA)

  add_library(dftfe DeviceBackendCUDA.hpp)

elseif (USE_HIP)

  add_library(dftfe DeviceBackendHIP.hpp)

elseif (USE_SYCL)

  add_library(dftfe DeviceBackendSYCL.hpp)

endif()

Device Backend Interface

DeviceBackend.h

/*
 * @author Bikash Kanungo
 */

#pragma once

namespace dftefe
{
  template <typename Kernel, typename... Args>
  void launchDevice(Kernel k, Args&&... args);
}

CUDA Backend

DeviceBackendCUDA.h

/*
 * @author Bikash Kanungo
 */

#include "DeviceBackend.h"
#include "KernelContext.h"

namespace dftefe
{
  template <typename Kernel, typename... Args>
    __global__ void cudaWrapper(Kernel k, Args... args)
    {
      KernelContext ctx;
      ctx.globalThreadId = blockIdx.x * blockDim.x + threadIdx.x;
      ctx.nThreadsPerBlock = blockDim.x;
      ctx.nThreadBlock = gridDim.x;

      k(ctx, args...);

      // add stream sync
    }

  template <typename Kernel, typename... Args>
    void launchDevice(Kernel k, Args&&... args)
    {
      //
      // NOTE: Needs to be set to something like
      // DFTEFE_DEVICE_BLOCK_SIZE and
      // DFTEFE_DEVICE_GRID_SIZE defined in KernelContext.h
      //

      int block = 256;
      int grid  = 64;

      cudaWrapper<<<grid, block>>>(k, args...);
    }
}

HIP Backend

DeviceBackendHIP.h

/*
 * @author Bikash Kanungo
 */

#include "DeviceBackend.h"
#include "KernelContext.h"
#include <hip/hip_runtime.h>

namespace dftefe
{
  template <typename Kernel, typename... Args>
    __global__ void hipWrapper(Kernel k, Args... args)
    {
      KernelContext ctx;
      ctx.globalThreadId = blockIdx.x * blockDim.x + threadIdx.x;

      k(ctx, args...);
    }

  template <typename Kernel, typename... Args>
    void launchDevice(Kernel k, Args... args)
    {
      //
      // NOTE: Needs to be set to something like
      // DFTEFE_DEVICE_BLOCK_SIZE and
      // DFTEFE_DEVICE_GRID_SIZE defined in KernelContext.h
      //

      int block = 256;
      int grid  = 64;

      hipLaunchKernelGGL(hipWrapper, grid, block, 0, 0, k, args...);
    }
}

SYCL Backend

DeviceBackendSYCL.h

/*
 * @author Bikash Kanungo
 */

#include "DeviceBackend.h"
#include "KernelContext.h"
#include <sycl/sycl.hpp>

namespace dftefe
{
  template <typename Kernel, typename... Args>
    void launchDevice(Kernel k, Args... args)
    {
      sycl::queue q;

      q.parallel_for(
          // NOTE: 1024 to be changed to some prefined integer like
          // DFTEFE_DEVICE_MAX_BLOCK_SIZE defined in KernelContext.h

          sycl::range<1>(1024),
          [=](sycl::id<1> i)
          {
            KernelContext ctx;
            ctx.globalThreadId = i[0];

            k(ctx, args...);
          });
    }
}

Example Driver

main.cpp

/*
 * @author Bikash Kanungo
 */

#include "Launch.h"
#include "ExampleKernels.h"
#include <utils/MemorySpace.h>

using namespace dftefe;

int main()
{
  float *x = nullptr;
  float *y = nullptr;
  float *result = nullptr;

  int n = 1024;

  SaxpyPolicy sp{x, y, 2.0f};
  ScalePolicy sc{x, 3.0f};
  DotPolicy dp{x, y};

  launch<utils::MemorySpace::HOST>(saxpy, sp, n);
  launch<utils::MemorySpace::DEVICE>(saxpy, sp, n);

  launch<utils::MemorySpace::HOST>(scale, sc, n);
  launch<utils::MemorySpace::DEVICE>(scale, sc, n);

  launch<utils::MemorySpace::DEVICE>(dot, dp, result, n);
}

Example Kernels

ExampleKernels.h

/*
 * @author Bikash Kanungo
 */

#pragma once

#include "KernelMacros.h"
#include "ExamplePolicies.h"

namespace dftefe
{

  __global__ void saxpy(SaxpyPolicy policy, int n)
  {
    if (globalThreadId < n)
      policy.foo(globalThreadId);
  }

  template <typename Policy>
  DFTFE_CREATE_KERNEL(
      void,
      generickernel,
      {
        if (globalThreadId < n)
        {
          policy(globalThreadId);
        }
      },
      Policy policy,
      int n,
      float *x,
  );

  DFTFE_CREATE_KERNEL(
      void,
      saxpyfoo,
      {
        if (globalThreadId < n)
          policy.foo(globalThreadId);
      },
      SaxpyPolicy policy,
      int n
  );

  DFTFE_CREATE_KERNEL(
      void,
      saxpy,
      {
        if (globalThreadId < n)
          policy(globalThreadId);
      },
      SaxpyPolicy policy,
      int n
  );

  DFTFE_CREATE_KERNEL(
      void,
      saxpyadd,
      {
        if (globalThreadId < n)
          policy(globalThreadId) + a * policy.foo(globalThreadId);
      },
      SaxpyPolicy policy,
      int n,
      int a
  );

  DFTFE_CREATE_KERNEL(
      void,
      scale,
      {
        if (globalThreadId < n)
          policy(globalThreadId);
      },
      ScalePolicy policy,
      int n
  );

  DFTFE_CREATE_KERNEL(
      float,
      dothost,
      {
        float val = 0.0;

        if (globalThreadId < n)
          val = policy(globalThreadId);

        if (globalThreadId == 0)
          *result = val;
      },
      DotPolicy<HOST> policy,
      float* result,
      int n
  );

}

Example Policies

ExamplePolicies.h

#pragma once

#include "kernel_context.h"

namespace dftefe
{

using uInt = std::uint32_t;

struct SaxpyPolicy
{
  float* x;
  float* y;
  float a;

  __host__ __device__
  // DFTFE_HOST DFTFE_DEVICE FORCEINLINE
  void operator()(uInt i) const
  {
    y[i] += a * x[i];
  }

  __host__ __device__
  void foo(uInt i) const
  {
    y[i] += a * x[i];
  }
};

struct ScalePolicy
{
  float* x;
  float a;

  __host__ __device__
  void operator()(uInt i) const
  {
    x[i] *= a;
  }
};

template<typename P1, typename P2>
class ComposePolicy
{
  P1 p1;
  P2 p2;

  __host__ __device__
  void operator()(uInt i, float * x) const
  {
    p1(i, x);
    p2(i, x);
  }
}

template <MemorySpace memorySpace>
struct DotPolicy
{
  float* x;
  float* y;

  __host__ __device__
  float operator()(uInt i) const
  {
    return x[i] * y[i];
  }
};

template <MemorySpace::HOST>
struct DotPolicy
{
  float* x;
  float* y;

  __host__ __device__
  float operator()(uInt i) const
  {
    return x[i] * y[i];
  }
};

struct SphericalPolicy
{
  TestPolicy(enum ..., Base * ptr);

  __host__ __device__
  float operator()(uInt i) const
  {
    if (enum == )
      return x[i] * y[i];
  }
};

}

Host Backend

HostBackend.h

/*
 * @author Bikash Kanungo
 */

#pragma once

#include "KernelContext.h"

namespace dftefe
{

  template <typename Kernel, typename... Args>
    void launchHost(Kernel k, Args&&... args)
    {
      const int N = 1024;

      for (int i = 0; i < N; ++i)
      {
        KernelContext ctx{i, 1, 1};

        k(ctx, std::forward<Args>(args)...);
      }
    }
}

Kernel Context

KernelContext.h

/*
 * @author Bikash Kanungo
 */

#pragma once

#include <cstdint>
#include <utils/TypeConfig.h>

namespace dftefe
{
  using size_type = utils::size_type;

  struct KernelContext
  {
    size_type globalThreadId;
    size_type nThreadsPerBlock;
    size_type nThreadBlock;
  };
}

Kernel Macros

KernelMacros.h

/*
 * @author Bikash Kanungo
 */

#pragma once

#include "KernelContext.h"

namespace dftefe
{

#define DFTEFE_CREATE_KERNEL(RET, NAME, BODY, ...)            \
  template <typename... Args>                                 \
  RET NAME(KernelContext ctx, Args&&... args)                 \
  {                                                           \
    const auto globalThreadId = ctx.globalThreadId;           \
    const auto nThreadsPerBlock = ctx.nThreadsPerBlock;       \
    const auto nThreadBlock = ctx.nThreadBlock;               \
    BODY                                                      \
  }

}

Unified Launch Interface

Launch.h

/*
 * @author Bikash Kanungo
 */

#pragma once

#include <utils/MemorySpace.h>
#include "HostBackend.h"
#include "DeviceBackend.h"
#include <utils/Exceptions.h>

namespace dftefe
{

  template <utils::MemorySpace B, typename Kernel, typename... Args>
    void launch(Kernel k, Args&&... args)
    {
      if constexpr (B == utils::MemorySpace::HOST)
      {
        launchHost(k, std::forward<Args>(args)...);
      }
      else
      {
#ifdef DFTEFE_WITH_DEVICE

        launchDevice(k, std::forward<Args>(args)...);

#else

        DFTFE_AssertWithMsg(false, "GPU backend not enabled");

#endif // DFTEFE_WITH_DEVICE
      }
    }
}