-
Notifications
You must be signed in to change notification settings - Fork 3
Policy Design
Avirup Sircar edited this page May 16, 2026
·
1 revision
if (USE_CUDA)
add_library(dftfe DeviceBackendCUDA.hpp)
elseif (USE_HIP)
add_library(dftfe DeviceBackendHIP.hpp)
elseif (USE_SYCL)
add_library(dftfe DeviceBackendSYCL.hpp)
endif()/*
* @author Bikash Kanungo
*/
#pragma once
namespace dftefe
{
template <typename Kernel, typename... Args>
void launchDevice(Kernel k, Args&&... args);
}/*
* @author Bikash Kanungo
*/
#include "DeviceBackend.h"
#include "KernelContext.h"
namespace dftefe
{
template <typename Kernel, typename... Args>
__global__ void cudaWrapper(Kernel k, Args... args)
{
KernelContext ctx;
ctx.globalThreadId = blockIdx.x * blockDim.x + threadIdx.x;
ctx.nThreadsPerBlock = blockDim.x;
ctx.nThreadBlock = gridDim.x;
k(ctx, args...);
// add stream sync
}
template <typename Kernel, typename... Args>
void launchDevice(Kernel k, Args&&... args)
{
//
// NOTE: Needs to be set to something like
// DFTEFE_DEVICE_BLOCK_SIZE and
// DFTEFE_DEVICE_GRID_SIZE defined in KernelContext.h
//
int block = 256;
int grid = 64;
cudaWrapper<<<grid, block>>>(k, args...);
}
}/*
* @author Bikash Kanungo
*/
#include "DeviceBackend.h"
#include "KernelContext.h"
#include <hip/hip_runtime.h>
namespace dftefe
{
template <typename Kernel, typename... Args>
__global__ void hipWrapper(Kernel k, Args... args)
{
KernelContext ctx;
ctx.globalThreadId = blockIdx.x * blockDim.x + threadIdx.x;
k(ctx, args...);
}
template <typename Kernel, typename... Args>
void launchDevice(Kernel k, Args... args)
{
//
// NOTE: Needs to be set to something like
// DFTEFE_DEVICE_BLOCK_SIZE and
// DFTEFE_DEVICE_GRID_SIZE defined in KernelContext.h
//
int block = 256;
int grid = 64;
hipLaunchKernelGGL(hipWrapper, grid, block, 0, 0, k, args...);
}
}/*
* @author Bikash Kanungo
*/
#include "DeviceBackend.h"
#include "KernelContext.h"
#include <sycl/sycl.hpp>
namespace dftefe
{
template <typename Kernel, typename... Args>
void launchDevice(Kernel k, Args... args)
{
sycl::queue q;
q.parallel_for(
// NOTE: 1024 to be changed to some prefined integer like
// DFTEFE_DEVICE_MAX_BLOCK_SIZE defined in KernelContext.h
sycl::range<1>(1024),
[=](sycl::id<1> i)
{
KernelContext ctx;
ctx.globalThreadId = i[0];
k(ctx, args...);
});
}
}/*
* @author Bikash Kanungo
*/
#include "Launch.h"
#include "ExampleKernels.h"
#include <utils/MemorySpace.h>
using namespace dftefe;
int main()
{
float *x = nullptr;
float *y = nullptr;
float *result = nullptr;
int n = 1024;
SaxpyPolicy sp{x, y, 2.0f};
ScalePolicy sc{x, 3.0f};
DotPolicy dp{x, y};
launch<utils::MemorySpace::HOST>(saxpy, sp, n);
launch<utils::MemorySpace::DEVICE>(saxpy, sp, n);
launch<utils::MemorySpace::HOST>(scale, sc, n);
launch<utils::MemorySpace::DEVICE>(scale, sc, n);
launch<utils::MemorySpace::DEVICE>(dot, dp, result, n);
}/*
* @author Bikash Kanungo
*/
#pragma once
#include "KernelMacros.h"
#include "ExamplePolicies.h"
namespace dftefe
{
__global__ void saxpy(SaxpyPolicy policy, int n)
{
if (globalThreadId < n)
policy.foo(globalThreadId);
}
template <typename Policy>
DFTFE_CREATE_KERNEL(
void,
generickernel,
{
if (globalThreadId < n)
{
policy(globalThreadId);
}
},
Policy policy,
int n,
float *x,
);
DFTFE_CREATE_KERNEL(
void,
saxpyfoo,
{
if (globalThreadId < n)
policy.foo(globalThreadId);
},
SaxpyPolicy policy,
int n
);
DFTFE_CREATE_KERNEL(
void,
saxpy,
{
if (globalThreadId < n)
policy(globalThreadId);
},
SaxpyPolicy policy,
int n
);
DFTFE_CREATE_KERNEL(
void,
saxpyadd,
{
if (globalThreadId < n)
policy(globalThreadId) + a * policy.foo(globalThreadId);
},
SaxpyPolicy policy,
int n,
int a
);
DFTFE_CREATE_KERNEL(
void,
scale,
{
if (globalThreadId < n)
policy(globalThreadId);
},
ScalePolicy policy,
int n
);
DFTFE_CREATE_KERNEL(
float,
dothost,
{
float val = 0.0;
if (globalThreadId < n)
val = policy(globalThreadId);
if (globalThreadId == 0)
*result = val;
},
DotPolicy<HOST> policy,
float* result,
int n
);
}#pragma once
#include "kernel_context.h"
namespace dftefe
{
using uInt = std::uint32_t;
struct SaxpyPolicy
{
float* x;
float* y;
float a;
__host__ __device__
// DFTFE_HOST DFTFE_DEVICE FORCEINLINE
void operator()(uInt i) const
{
y[i] += a * x[i];
}
__host__ __device__
void foo(uInt i) const
{
y[i] += a * x[i];
}
};
struct ScalePolicy
{
float* x;
float a;
__host__ __device__
void operator()(uInt i) const
{
x[i] *= a;
}
};
template<typename P1, typename P2>
class ComposePolicy
{
P1 p1;
P2 p2;
__host__ __device__
void operator()(uInt i, float * x) const
{
p1(i, x);
p2(i, x);
}
}
template <MemorySpace memorySpace>
struct DotPolicy
{
float* x;
float* y;
__host__ __device__
float operator()(uInt i) const
{
return x[i] * y[i];
}
};
template <MemorySpace::HOST>
struct DotPolicy
{
float* x;
float* y;
__host__ __device__
float operator()(uInt i) const
{
return x[i] * y[i];
}
};
struct SphericalPolicy
{
TestPolicy(enum ..., Base * ptr);
__host__ __device__
float operator()(uInt i) const
{
if (enum == )
return x[i] * y[i];
}
};
}/*
* @author Bikash Kanungo
*/
#pragma once
#include "KernelContext.h"
namespace dftefe
{
template <typename Kernel, typename... Args>
void launchHost(Kernel k, Args&&... args)
{
const int N = 1024;
for (int i = 0; i < N; ++i)
{
KernelContext ctx{i, 1, 1};
k(ctx, std::forward<Args>(args)...);
}
}
}/*
* @author Bikash Kanungo
*/
#pragma once
#include <cstdint>
#include <utils/TypeConfig.h>
namespace dftefe
{
using size_type = utils::size_type;
struct KernelContext
{
size_type globalThreadId;
size_type nThreadsPerBlock;
size_type nThreadBlock;
};
}/*
* @author Bikash Kanungo
*/
#pragma once
#include "KernelContext.h"
namespace dftefe
{
#define DFTEFE_CREATE_KERNEL(RET, NAME, BODY, ...) \
template <typename... Args> \
RET NAME(KernelContext ctx, Args&&... args) \
{ \
const auto globalThreadId = ctx.globalThreadId; \
const auto nThreadsPerBlock = ctx.nThreadsPerBlock; \
const auto nThreadBlock = ctx.nThreadBlock; \
BODY \
}
}/*
* @author Bikash Kanungo
*/
#pragma once
#include <utils/MemorySpace.h>
#include "HostBackend.h"
#include "DeviceBackend.h"
#include <utils/Exceptions.h>
namespace dftefe
{
template <utils::MemorySpace B, typename Kernel, typename... Args>
void launch(Kernel k, Args&&... args)
{
if constexpr (B == utils::MemorySpace::HOST)
{
launchHost(k, std::forward<Args>(args)...);
}
else
{
#ifdef DFTEFE_WITH_DEVICE
launchDevice(k, std::forward<Args>(args)...);
#else
DFTFE_AssertWithMsg(false, "GPU backend not enabled");
#endif // DFTEFE_WITH_DEVICE
}
}
}