Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
27390ec
Add syclSolverInverter routines.
jngkim May 10, 2022
ead9857
Merge DelayedUpdateSYCL.h
jngkim May 10, 2022
2e3012d
Use syclSolverInverter in DelayedUpdateSYCL.h
jngkim May 11, 2022
6b5d0ed
Merge remote-tracking branch 'origin/develop' into sycl-allocator-solver
ye-luo May 11, 2022
5d40b0c
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim May 12, 2022
b5fb457
Fix complex compilation.
ye-luo May 17, 2022
64fcaff
Formatting
ye-luo May 17, 2022
6222150
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim May 26, 2022
4eaa3c6
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 1, 2022
58776ff
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 2, 2022
9958031
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 7, 2022
42005de
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 9, 2022
c6ee364
Update with interop and add waits.
jngkim Jun 9, 2022
8edea7a
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 10, 2022
10c90b2
Add tests for Rotated SPOs using LCAO
mdewing Jun 13, 2022
01e2c26
Merge pull request #4059 from markdewing/test_rotated_lcao
prckent Jun 13, 2022
58f01ad
Add data access APIs in OhmmsArray
ye-luo Jun 11, 2022
6685f2f
Renaming variables.
ye-luo Jun 11, 2022
7f77018
Replace operator() with parameter packing.
ye-luo Jun 14, 2022
c210527
Rename data(offset) to data_at(indices)
ye-luo Jun 14, 2022
ba4363b
Replace redundant code with a function.
ye-luo Jun 14, 2022
036d619
Grouping doxygen comments.
ye-luo Jun 14, 2022
908f40e
Expand a bit unit test.
ye-luo Jun 14, 2022
d362348
Adjust phi_vgl layout.
ye-luo Mar 19, 2022
4c96e70
Align SplineC2COMPTarget layout as SplineC2ROMPTarget
ye-luo Apr 23, 2022
8ce4c90
Adopt OhmmsArray in SPOSet.
ye-luo Jun 11, 2022
6ad3a90
Verbose magic number
ye-luo Jun 14, 2022
f6a257f
Merge pull request #4057 from ye-luo/update-OhmmsArray
ye-luo Jun 14, 2022
d1d93bd
Merge branch 'develop' into adjust-layout
ye-luo Jun 14, 2022
0d4d5e9
Adjust tolerance for deterministic test for SOREP
Hyeondeok-Shin Jun 14, 2022
47e615b
Merge pull request #4058 from ye-luo/adjust-layout
prckent Jun 14, 2022
cac7085
Merge branch 'develop' into deterministic_test
prckent Jun 14, 2022
509dd33
Merge pull request #4060 from Hyeondeok-Shin/deterministic_test
prckent Jun 14, 2022
21c6bf1
Fix legacy CUDA compilation issue.
ye-luo Jun 15, 2022
a25bf08
Adjust tolerance for unit_test_estimators for One-body densty matrice…
Hyeondeok-Shin Jun 15, 2022
3365a39
Merge pull request #4063 from ye-luo/fix-legacy-cuda
prckent Jun 15, 2022
e0df1eb
Derive OpenMP OFFLOAD_ARCH based on HIP_ARCH.
ye-luo Jun 15, 2022
1aed50d
Update OpenMP offload build instruction.
ye-luo Jun 15, 2022
374d06e
Merge branch 'develop' into deterministic_test
ye-luo Jun 15, 2022
e9a6a57
Merge pull request #4064 from Hyeondeok-Shin/deterministic_test
ye-luo Jun 16, 2022
7af3618
Merge branch 'develop' into auto-derive-arch
prckent Jun 16, 2022
134e51a
Merge pull request #4065 from ye-luo/auto-derive-arch
prckent Jun 16, 2022
82a3424
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 17, 2022
19acb57
Update nightly_anl_bora.sh
ye-luo Jun 20, 2022
4d21eb1
Update test scripts and remove unused scripts.
ye-luo Jun 20, 2022
de1db4f
Merge pull request #4067 from ye-luo/testing-scripts
prckent Jun 20, 2022
e7027ae
Orbital rotation test with legacy driver
mdewing Jun 20, 2022
0a0d55a
Do not use cudaDeviceProp maxTexture1D in HIP
jakurzak Jun 20, 2022
7193bc4
Merge pull request #4070 from jakurzak/develop
ye-luo Jun 20, 2022
9f8548b
Merge branch 'develop' into he_orb_rot_test
prckent Jun 22, 2022
3637ba2
Merge pull request #4069 from markdewing/he_orb_rot_test
prckent Jun 22, 2022
cdf7430
Add MPI support to ROCm legacy CI
williamfgc Jun 22, 2022
cf217ed
Merge pull request #4071 from williamfgc/ci-rocm-legacy-mpi
prckent Jun 22, 2022
e0afba1
Rewrite loop to avoid NVHPC hang.
ye-luo Jun 23, 2022
49bfe4b
Remove omp parallel over walkers.
ye-luo Jun 23, 2022
01e69af
Merge pull request #4073 from ye-luo/avoid-nvhpc-hang
ye-luo Jun 23, 2022
30d63d2
Merge branch 'develop' into remove-parallel
ye-luo Jun 23, 2022
f98bddf
Merge pull request #4074 from ye-luo/remove-parallel
ye-luo Jun 23, 2022
608cdab
Add MSD::mw_accept_rejectMove unit test
ye-luo Jun 24, 2022
2fce443
Add mock-up mw_accept_rejectMove in MSD.
ye-luo Jun 13, 2022
17160d4
drop standalone-debug for offload builds
PDoakORNL Jun 24, 2022
c0ea563
Merge pull request #4072 from ye-luo/MSD_mw_accept_reject
prckent Jun 24, 2022
8019f7f
adding a message about -fstandalone-debug
PDoakORNL Jun 24, 2022
7e9baad
Merge branch 'develop' into remove_annoying_standalone_warning
ye-luo Jun 24, 2022
179a2d0
Guard div by 0
prckent Jun 24, 2022
01b4f73
Merge pull request #4080 from PDoakORNL/remove_annoying_standalone_wa…
ye-luo Jun 24, 2022
7559242
Merge branch 'develop' into fixr
prckent Jun 25, 2022
7d8618d
Merge pull request #4082 from prckent/fixr
prckent Jun 25, 2022
5867ffb
Merge branch 'QMCPACK:develop' into sycl-allocator-solver
jngkim Jun 27, 2022
b89a911
Fix to avoid double-free with latest compilers.
jngkim Jul 11, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/ci-github-actions-self-hosted.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ jobs:
ROCm-Clang13-NoMPI-CUDA2HIP-Real,
ROCm-Clang13-NoMPI-CUDA2HIP-Complex-Mixed,
ROCm-Clang13-NoMPI-CUDA2HIP-Complex,
ROCm-Clang13-NoMPI-Legacy-CUDA2HIP-Real-Mixed,
ROCm-Clang13-NoMPI-Legacy-CUDA2HIP-Real,
ROCm-Clang13-NoMPI-Legacy-CUDA2HIP-Complex-Mixed,
ROCm-Clang13-NoMPI-Legacy-CUDA2HIP-Complex,
ROCm-Clang13-MPI-Legacy-CUDA2HIP-Real-Mixed,
ROCm-Clang13-MPI-Legacy-CUDA2HIP-Real,
ROCm-Clang13-MPI-Legacy-CUDA2HIP-Complex-Mixed,
ROCm-Clang13-MPI-Legacy-CUDA2HIP-Complex,
]

steps:
Expand Down
35 changes: 29 additions & 6 deletions CMake/ClangCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ if(QMC_OMP)
set(OPENMP_OFFLOAD_COMPILE_OPTIONS "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Wno-linker-warnings")
endif()

if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "amdgcn")
set(OFFLOAD_ARCH gfx906)
endif()

if(NOT DEFINED OFFLOAD_ARCH
AND OFFLOAD_TARGET MATCHES "nvptx64"
AND DEFINED CMAKE_CUDA_ARCHITECTURES)
Expand All @@ -51,6 +47,24 @@ if(QMC_OMP)
endif()
endif()

if(NOT DEFINED OFFLOAD_ARCH
AND OFFLOAD_TARGET MATCHES "amdgcn")
if (DEFINED HIP_ARCH)
list(LENGTH HIP_ARCH NUMBER_HIP_ARCHITECTURES)
if(NUMBER_HIP_ARCHITECTURES EQUAL "1")
set(OFFLOAD_ARCH ${HIP_ARCH})
else()
message(
FATAL_ERROR
"LLVM does not yet support offload to multiple architectures! "
"Deriving OFFLOAD_ARCH from HIP_ARCH failed. "
"Please keep only one entry in HIP_ARCH or set OFFLOAD_ARCH.")
endif()
else()
set(OFFLOAD_ARCH gfx906)
endif()
endif()

if(DEFINED OFFLOAD_ARCH)
set(OPENMP_OFFLOAD_COMPILE_OPTIONS
"${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")
Expand Down Expand Up @@ -90,8 +104,17 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")

# Set extra debug flags
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer -fstandalone-debug")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer -fstandalone-debug")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer")

# unfortunately this removes standalone-debug altogether for offload builds
# but until we discover how to use the ${OPENMP_OFFLOAD_COMPILE_OPTIONS} more selectively
# this is the only way to avoid a warning per compilation unit that contains an omp symbol.
if (NOT OFFLOAD_TARGET MATCHES "nvptx64")
message(STATUS "QMCPACK adds -fstandalone-debug for Debug builds")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fstandalone-debug")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fstandalone-debug")
endif()

#--------------------------------------
# Special architectural flags
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ if(ENABLE_ROCM)
message(STATUS "ROCM_ROOT not provided. Searching for FindHIP.cmake file.")
find_path(
HIP_MODULE_FILE_DIR FindHIP.cmake
HINTS /opt/rocm
HINTS $ENV{ROCM_PATH} /opt/rocm
PATH_SUFFIXES hip/cmake)
if(HIP_MODULE_FILE_DIR)
message(STATUS "Found FindHIP.cmake file. ROCM_ROOT will be derived.")
Expand Down Expand Up @@ -856,7 +856,7 @@ if(ENABLE_SYCL)
endif()
add_library(SYCL::host INTERFACE IMPORTED)
add_library(SYCL::device INTERFACE IMPORTED)
find_package(IntelDPCPP REQUIRED CONFIGS IntelDPCPPConfig-modified.cmake PATHS ${PROJECT_CMAKE})
find_package(IntelDPCPP REQUIRED CONFIGS IntelDPCPPConfig-modified.cmake PATHS ${PROJECT_CMAKE} NO_DEFAULT_PATH)
target_link_libraries(SYCL::host INTERFACE OneAPI::DPCPP-host)
target_link_libraries(SYCL::device INTERFACE OneAPI::DPCPP-device)
if(TARGET MKL::sycl)
Expand Down
41 changes: 28 additions & 13 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -282,20 +282,20 @@ the path to the source directory.

::

QMC_CUDA Enable legacy CUDA code path for NVIDIA GPU acceleration (1:yes, 0:no)
QMC_COMPLEX Build the complex (general twist/k-point) version (1:yes, 0:no)
QMC_MIXED_PRECISION Build the mixed precision (mixing double/float) version
(1:yes (QMC_CUDA=1 default), 0:no (QMC_CUDA=0 default)).
Mixed precision calculations can be signifiantly faster but should be
carefully checked validated against full double precision runs,
particularly for large electron counts.
ENABLE_OFFLOAD ON/OFF(default). Enable OpenMP target offload for GPU acceleration.
QMC_CUDA Enable legacy CUDA code path for NVIDIA GPU acceleration (1:yes, 0:no)
ENABLE_CUDA ON/OFF(default). Enable CUDA code path for NVIDIA GPU acceleration.
Production quality for AFQMC. Pre-production quality for real-space.
Production quality for AFQMC and real-space performance portable implementation.
Use CMAKE_CUDA_ARCHITECTURES, default 70, to set the actual GPU architecture.
ENABLE_OFFLOAD ON/OFF(default). Enable OpenMP target offload for GPU acceleration.
ENABLE_TIMERS ON(default)/OFF. Enable fine-grained timers. Timers are on by default but at level coarse
to avoid potential slowdown in tiny systems.
For systems beyond tiny sizes (100+ electrons) there is no risk.
QMC_CUDA2HIP ON/OFF(default). To be set ON, it requires either QMC_CUDA or ENABLE_CUDA to be ON.
Compile CUDA source code as HIP and use ROCm libraries for AMD GPUs.
ENABLE_SYCL ON/OFF(default). Enable SYCL code path. Only support Intel GPUs and OneAPI compilers.

- General build options

Expand Down Expand Up @@ -327,6 +327,9 @@ the path to the source directory.

::

ENABLE_TIMERS ON(default)/OFF. Enable fine-grained timers. Timers are on by default but at level coarse
to avoid potential slowdown in tiny systems.
For systems beyond tiny sizes (100+ electrons) there is no risk.
QE_BIN Location of Quantum ESPRESSO binaries including pw2qmcpack.x
RMG_BIN Location of RMG binary (rmg-cpu)
QMC_DATA Specify data directory for QMCPACK performance and integration tests
Expand Down Expand Up @@ -412,7 +415,7 @@ and is not suitable for production. Additional implementation in QMCPACK as
well as improvements in open-source and vendor compilers is required for production status
to be reached. The following compilers have been verified:

- LLVM Clang 11. Support NVIDIA GPUs.
- LLVM Clang 14. Support NVIDIA GPUs.

::

Expand All @@ -425,31 +428,43 @@ to be reached. The following compilers have been verified:
OFFLOAD_TARGET for the offload target. default nvptx64-nvidia-cuda.
OFFLOAD_ARCH for the target architecture (sm_80, gfx906, ...) if not using the compiler default.

- AMD AOMP Clang 11.8. Support AMD GPUs.
- AMD ROCm/AOMP LLVM-based compilers. Support AMD GPUs.

::

-D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=amdgcn-amd-amdhsa -D OFFLOAD_ARCH=gfx906

- Intel oneAPI beta08. Support Intel GPUs.
- Intel oneAPI 2022.1.0 icx/icpx compilers. Support Intel GPUs.

::

-D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=spir64

- HPE Cray 11. It is derived from Clang and supports NVIDIA and AMD GPUs.
- HPE Cray 13. It is derived from Clang and supports NVIDIA and AMD GPUs.

::

-D ENABLE_OFFLOAD=ON -D OFFLOAD_TARGET=nvptx64-nvidia-cuda -D OFFLOAD_ARCH=sm_80

OpenMP offload features can be used together with vendor specific code paths to maximize QMCPACK performance.
Some new CUDA functionality has been implemented to improve efficiency on NVIDIA GPUs in conjunction with the Offload code paths:
For example, using Clang 11 on Summit.
Some new CUDA functionality has been implemented to improve performance on NVIDIA GPUs in conjunction with the offload code paths:
For example, using Clang 14 on Summit.

::

-D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CMAKE_CUDA_HOST_COMPILER=`which gcc`
-D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70

Similarly, HIP features can be enabled in conjunction with the offload code path to improve performance on AMD GPUs.

::

-D ENABLE_OFFLOAD=ON -D ENABLE_CUDA=ON -D QMC_CUDA2HIP=ON -DHIP_ARCH=gfx906

Similarly, SYCL features can be enabled in conjunction with the offload code path to improve performance on Intel GPUs.

::

-D ENABLE_OFFLOAD=ON -D ENABLE_SYCL=ON


Installation from CMake
Expand Down
4 changes: 3 additions & 1 deletion nexus/tests/unit/test_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,7 +1372,9 @@ def test_embed():
r = np.linalg.norm(dr,axis=1)
dilation = 2*r*np.exp(-r)
for i in range(npos):
gr.pos[i] += dilation[i]/r[i]*dr[i]
if r[i]>0:
gr.pos[i] += dilation[i]/r[i]*dr[i]
#end if
#end for

# Represent the unrelaxed large cell
Expand Down
3 changes: 2 additions & 1 deletion src/Configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ struct QMCTraits
{
enum
{
DIM = OHMMS_DIM
DIM = OHMMS_DIM,
DIM_VGL = OHMMS_DIM + 2 // Value(1) + Gradients(OHMMS_DIM) + Laplacian(1)
};
using QTBase = QMCTypes<OHMMS_PRECISION, DIM>;
using QTFull = QMCTypes<OHMMS_PRECISION_FULL, DIM>;
Expand Down
120 changes: 104 additions & 16 deletions src/Containers/OhmmsPETE/OhmmsArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class Array
inline typename Container_t::const_iterator begin() const { return X.begin(); }
inline typename Container_t::const_iterator end() const { return X.end(); }

///@{
/// access the container data pointer
inline Type_t* data() { return X.data(); }
inline const Type_t* data() const { return X.data(); }
template<typename Allocator = ALLOC, typename = qmcplusplus::IsDualSpace<Allocator>>
Expand All @@ -94,14 +96,70 @@ class Array
{
return X.device_data();
}
///@}

inline const Type_t* first_address() const { return &(X[0]); }
///@{
/// access the data pointer at {index_1, ..., index_D}
template<typename SIZET = size_t, typename = std::is_integral<SIZET>>
Type_t* data_at(const std::array<SIZET, D>& indices)
{
return X.data() + compute_offset(indices);
}
template<typename SIZET = size_t, typename = std::is_integral<SIZET>>
const Type_t* data_at(const std::array<SIZET, D>& indices) const
{
return X.data() + compute_offset(indices);
}
template<typename SIZET = size_t,
typename = std::is_integral<SIZET>,
typename Allocator = ALLOC,
typename = qmcplusplus::IsDualSpace<Allocator>>
Type_t* device_data_at(const std::array<SIZET, D>& indices)
{
return X.device_data() + compute_offset(indices);
}
template<typename SIZET = size_t,
typename = std::is_integral<SIZET>,
typename Allocator = ALLOC,
typename = qmcplusplus::IsDualSpace<Allocator>>
const Type_t* device_data_at(const std::array<SIZET, D>& indices) const
{
return X.device_data() + compute_offset(indices);
}

template<typename... Args>
Type_t* data_at(Args... indices)
{
static_assert(sizeof...(Args) == D, "data arguments must match dimensionality of Array");
return data_at({static_cast<std::size_t>(std::forward<Args>(indices))...});
}
template<typename... Args>
const Type_t* data_at(Args... indices) const
{
static_assert(sizeof...(Args) == D, "data arguments must match dimensionality of Array");
return data_at({static_cast<std::size_t>(std::forward<Args>(indices))...});
}
template<typename... Args, typename Allocator = ALLOC, typename = qmcplusplus::IsDualSpace<Allocator>>
Type_t* device_data_at(Args... indices)
{
static_assert(sizeof...(Args) == D, "device_data arguments must match dimensionality of Array");
return device_data_at({static_cast<std::size_t>(std::forward<Args>(indices))...});
}
template<typename... Args, typename Allocator = ALLOC, typename = qmcplusplus::IsDualSpace<Allocator>>
const Type_t* device_data_at(Args... indices) const
{
static_assert(sizeof...(Args) == D, "device_data arguments must match dimensionality of Array");
return device_data_at({static_cast<std::size_t>(std::forward<Args>(indices))...});
}
///@}

inline const Type_t* last_address() const { return &(X[0]) + X.size(); }
inline const Type_t* first_address() const { return X.data(); }

inline Type_t* first_address() { return &(X[0]); }
inline const Type_t* last_address() const { return X.data() + X.size(); }

inline Type_t* last_address() { return &(X[0]) + X.size(); }
inline Type_t* first_address() { return X.data(); }

inline Type_t* last_address() { return X.data() + X.size(); }

This_t& operator=(const T& rhs)
{
Expand All @@ -127,22 +185,31 @@ class Array
return *this;
}

// Get and Set Operations
inline Type_t& operator()(size_t i) { return X[i]; }

inline Type_t operator()(size_t i) const { return X[i]; }
inline Type_t& operator()(size_t i, size_t j) { return X[j + Length[1] * i]; }
inline Type_t operator()(size_t i, size_t j) const { return X[j + Length[1] * i]; }
inline Type_t& operator()(size_t i, size_t j, size_t k) { return X[k + Length[2] * (j + Length[1] * i)]; }
inline Type_t operator()(size_t i, size_t j, size_t k) const { return X[k + Length[2] * (j + Length[1] * i)]; }
inline Type_t& operator()(size_t i, size_t j, size_t k, size_t l)
///@{
/// access the element at {index_1, ..., index_D}
template<typename SIZET = size_t, typename = std::is_integral<SIZET>>
Type_t& operator()(const std::array<SIZET, D>& indices)
{
return X[l + Length[3] * (k + Length[2] * (j + Length[1] * i))];
return X[compute_offset(indices)];
}
inline Type_t operator()(size_t i, size_t j, size_t k, size_t l) const
template<typename SIZET = size_t, typename = std::is_integral<SIZET>>
const Type_t& operator()(const std::array<SIZET, D>& indices) const
{
return X[l + Length[3] * (k + Length[2] * (j + Length[1] * i))];
return X[compute_offset(indices)];
}
template<typename... Args>
Type_t& operator()(Args... indices)
{
static_assert(sizeof...(Args) == D, "operator() arguments must match dimensionality of Array");
return operator()({static_cast<std::size_t>(std::forward<Args>(indices))...});
}
template<typename... Args>
const Type_t& operator()(Args... indices) const
{
static_assert(sizeof...(Args) == D, "operator() arguments must match dimensionality of Array");
return operator()({static_cast<std::size_t>(std::forward<Args>(indices))...});
}
///@}

inline Type_t sum() const
{
Expand All @@ -152,6 +219,18 @@ class Array
return s;
}

// Abstract Dual Space Transfers
template<typename Allocator = ALLOC, typename = qmcplusplus::IsDualSpace<Allocator>>
void updateTo()
{
X.updateTo();
}
template<typename Allocator = ALLOC, typename = qmcplusplus::IsDualSpace<Allocator>>
void updateFrom()
{
X.updateFrom();
}

private:
std::array<size_t, D> Length;
Container_t X;
Expand All @@ -163,6 +242,15 @@ class Array
total *= dims[i];
return total;
}

template<typename SIZET = size_t, typename = std::is_integral<SIZET>>
SIZET compute_offset(const std::array<SIZET, D>& indices) const
{
SIZET offset = indices[0];
for (int i = 1; i < indices.size(); i++)
offset = offset * Length[i] + indices[i];
return offset;
}
};

template<class T, unsigned D, class Alloc>
Expand Down
15 changes: 13 additions & 2 deletions src/Containers/OhmmsPETE/tests/test_Array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ namespace qmcplusplus
TEST_CASE("array", "[OhmmsPETE]")
{
using Array1D = Array<double, 1>;
Array1D A({3});
Array1D B({3});
Array1D A(3);
Array1D B(3);

// iterator
auto ia = A.begin();
Expand Down Expand Up @@ -73,6 +73,17 @@ TEST_CASE("array NestedContainers", "[OhmmsPETE]")
CHECK(vec_copy(0).back() == 123);
}

TEST_CASE("Array::data", "[OhmmsPETE]")
{
Array<float, 3> tensor(2, 4, 5);
REQUIRE(tensor.size() == 40);

CHECK(tensor.data() + 1 * 4 * 5 + 2 * 5 + 3 == tensor.data_at(1, 2, 3));

tensor(1, 2, 3) = 0.5f;
CHECK(*tensor.data_at(1, 2, 3) == 0.5f);
}

TEST_CASE("Array::dimension sizes constructor", "[OhmmsPETE]")
{
const int dim = 2;
Expand Down
Loading