From 7585c091c8ee7ff72f4a48f1671b9a7318d5c215 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Tue, 25 Feb 2025 19:35:37 -0800 Subject: [PATCH 01/13] add welch psd function --- CMakeLists.txt | 11 +- src/array_ops.cxx | 270 +++++++++++++++++++++++++++++++++++++++++ test/test_array_ops.py | 66 ++++++++++ 3 files changed, 346 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cd83c01..1508d264 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ find_package(Spt3g REQUIRED) find_package(Python COMPONENTS Interpreter Development.Module REQUIRED) find_package(FLAC) find_package(GSL) +find_package(FFTW) find_package(OpenMP) if(OPENMP_FOUND) @@ -84,9 +85,17 @@ target_link_libraries(so3g PUBLIC spt3g::core) target_include_directories(so3g PRIVATE ${GSL_INCLUDE_DIR}) target_link_libraries(so3g PUBLIC ${GSL_LIBRARIES}) +# Link FFTW +target_include_directories(so3g PRIVATE ${FFTW_INCLUDE_DIRS}) +target_link_libraries(so3g PUBLIC ${FFTW_LIBRARIES}) +target_link_libraries(so3g PUBLIC ${FFTW_OMP_LIBRARY}) +# Link FFTWF +target_link_libraries(so3g PUBLIC ${FFTWF_LIBRARIES}) +target_link_libraries(so3g PUBLIC ${FFTWF_OMP_LIBRARY}) + # FLAC- library already comes from spt3g dependencies, but # we need to have the headers. -target_include_directories(so3g PRIVATE ${FLAC_INCLUDE_DIR}) +# target_include_directories(so3g PRIVATE ${FLAC_INCLUDE_DIR}) # You probably want to select openblas, so pass -DBLA_VENDOR=OpenBLAS find_package(BLAS REQUIRED) diff --git a/src/array_ops.cxx b/src/array_ops.cxx index 21489acc..6593bbe0 100644 --- a/src/array_ops.cxx +++ b/src/array_ops.cxx @@ -23,6 +23,8 @@ extern "C" { #include #include +#include + #include #include "so3g_numpy.h" #include "numpy_assist.h" @@ -1256,6 +1258,258 @@ void detrend(bp::object & tod, const std::string & method, const int linear_ncou } } +template +void _hanning_window(T* window, const int n) +{ + for (int i = 0; i < n; ++i) { + window[i] = 0.5 * (1 - cos(2 * M_PI * i / (n - 1))); + } +} + +template +auto _allocate_fftw_output(const int n) { + if constexpr (std::is_same::value) { + return static_cast(fftwf_malloc(sizeof(fftwf_complex) * n)); + } + else if constexpr (std::is_same::value) { + return static_cast(fftw_malloc(sizeof(fftw_complex) * n)); + } +} + +template +auto _select_fftw_plan(T1* in, T2* out, const int ndets, + const int nperseg, const int npsd, + const int idist, const int odist) +{ + int rank = 1; // 1D FFT + int n[] = {static_cast(nperseg)}; // FFT size + int howmany = ndets; // Number of transforms to compute + int istride = 1; // Input stride + int ostride = 1; // Output stride + int *inembed = n; // Input array dimensions + int *onembed = n; // Output array dimensions + + if constexpr (std::is_same::value) { + return fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, FFTW_ESTIMATE); + } + else if constexpr (std::is_same::value) { + return fftw_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, + out, onembed, ostride, odist, FFTW_ESTIMATE); + } +} + +template +void _execute_fftw_plan(T plan) +{ + if constexpr (std::is_same::value) { + fftw_execute(plan); + } + else if constexpr (std::is_same::value) { + fftwf_execute(plan); + } +} + +template +void _welch(const bp::object & signal, bp::object & psd, const double fs, + const int nperseg, int noverlap, const std::string & detrend_method, + const int detrend_ncount, const std::string & scaling, const bool padding) +{ + BufferWrapper signal_buf ("signal", signal, false, std::vector{-1, -1}); + if (signal_buf->strides[1] != signal_buf->itemsize) + throw ValueError_exception("Argument 'signal' must be contiguous in last axis."); + const int ndets = signal_buf->shape[0]; + const int nsamps = signal_buf->shape[1]; + T* signal_data = (T*)signal_buf->buf; + + BufferWrapper psd_buf ("psd", psd, false, std::vector{-1, -1}); + if (psd_buf->strides[1] != psd_buf->itemsize) + throw ValueError_exception("Argument 'psd' must be contiguous in last axis."); + T* psd_data = (T*)psd_buf->buf; + + // Data strides + int signal_stride = signal_buf->strides[0] / sizeof(T); + int psd_stride = psd_buf->strides[0] / sizeof(T); + + // Get number of threads for fftw + int nthreads = 1; + #pragma omp parallel + { + #ifdef _OPENMP + if (omp_get_thread_num() == 0) + nthreads = omp_get_num_threads(); + #endif + } + + if constexpr (std::is_same::value) { + fftwf_init_threads(); + fftwf_plan_with_nthreads(nthreads); + } + else if constexpr (std::is_same::value) { + fftw_init_threads(); + fftw_plan_with_nthreads(nthreads); + } + + if (nperseg > nsamps) { + throw ValueError_exception("nperseg > nsamps'"); + } + + // Default noverlap + if (noverlap < 0) { + noverlap = nperseg / 2; + } + + int npsd = (nperseg / 2) + 1; + int nstep = nperseg - noverlap; + + // Window array + T window[nperseg]; + _hanning_window(window, nperseg); + + T scale = 1.0; + + if (scaling == "density") { + T window_sum_sq = 0.0; + for (int i = 0; i < nperseg; ++i) { + window_sum_sq += window[i] * window[i]; + } + scale = 1.0 / (fs * window_sum_sq); + } + else if (scaling == "spectrum") { + T window_sum = 0.0; + for (int i = 0; i < nperseg; ++i) { + window_sum += window[i]; + } + scale = 1.0 / (window_sum * window_sum); + } + else { + throw ValueError_exception("Supported scaling options are 'density' " + "or 'spectrum'"); + } + + int nadd = 0; + if (padding) { + nadd = (-(nsamps - nperseg) % nstep) % nperseg; + } + + // Number of segments to average over + int nsegments = ((nsamps + nadd) - noverlap) / nstep; + + // Input array for segment + T* segment = (T*) malloc(ndets * nperseg * sizeof(T)); + + // Either fftw_complex* or fftwf_complex* + auto out = _allocate_fftw_output(ndets * npsd); + + // Plan creation is not thread safe and creating many upfront + // is less efficient, so just reuse one sequentially and + // parallelize internally. + auto plan = _select_fftw_plan(segment, out, ndets, nperseg, npsd, + nperseg, psd_stride); + + // Loop over segments + for (int s = 0; s < nsegments; ++s) { + int start = s * nstep; + int end = start + nperseg; + + #pragma omp parallel for + for (int i = 0; i < ndets; ++i) { + int signal_ioff = i * signal_stride + start; + int segment_ioff = i * nperseg; + + T* signal_row = signal_data + signal_ioff; + T* segment_row = segment + segment_ioff; + + // Copy data due to detrending and windowing + for (int j = 0; j < nperseg; ++j) { + segment_row[j] = signal_row[j]; + } + + // More efficient to detrend each row in a parallel loop + // with data copying and windowing than to do each individually + // in parallel loops + if (detrend_method != "none") { + _detrend(segment_row, 1, nperseg, nperseg, detrend_method, + detrend_ncount, 1); + } + // Apply window function + for (int j = 0; j < nperseg; ++j) { + segment_row[j] *= window[j]; + } + } + + // Execute either fftw or fftwf plan + _execute_fftw_plan(plan); + + // Add segement psd into total psd array + #pragma omp parallel for + for (int i = 0; i < ndets; ++i) { + int ioff = i * psd_stride; + for (int j = 0; j < npsd; ++j) { + T real = out[ioff + j][0]; + T imag = out[ioff + j][1]; + psd_data[ioff + j] += (real * real + imag * imag) * scale; + } + } + } + + // Get average psd over segments + #pragma omp parallel for + for (int i = 0; i < ndets; ++i) { + int ioff = i * psd_stride; + for (int j = 0; j < npsd; ++j) { + psd_data[ioff + j] /= nsegments; + } + } + + // Normalization of endpoints + int end_index = npsd; + + if (nperseg % 2) { + end_index -= 1; + } + + #pragma omp parallel for + for (int i = 0; i < ndets; ++i) { + int ioff = i * psd_stride; + for (int j = 1; j < end_index; ++j) { + psd_data[ioff + j] *= 2; + } + } + + if constexpr (std::is_same::value) { + fftw_destroy_plan(plan); + fftw_free(out); + fftw_cleanup_threads(); + } + else if constexpr (std::is_same::value) { + fftwf_destroy_plan(plan); + fftwf_free(out); + fftwf_cleanup_threads(); + } + free(segment); +} + +void welch(const bp::object & signal, bp::object & psd, const double fs, + const int nperseg, const int noverlap, const std::string & detrend_method, + const int detrend_ncount, const std::string & scaling, const bool padding) +{ + // get data type + int dtype = get_dtype(signal); + + if (dtype == NPY_FLOAT) { + _welch(signal, psd, fs, nperseg, noverlap, detrend_method, + detrend_ncount, scaling, padding); + } + else if (dtype == NPY_DOUBLE) { + _welch(signal, psd, fs, nperseg, noverlap, detrend_method, + detrend_ncount, scaling, padding); + } + else { + throw TypeError_exception("Only float32 or float64 arrays are supported."); + } +} + PYBINDINGS("so3g") { @@ -1415,4 +1669,20 @@ PYBINDINGS("so3g") " linear_ncount: Number (int) of samples to use on each end, when measuring mean level for 'linear'" " detrend. Must be a positive integer or -1. If -1, nsamps / 2 will be used. Values " " larger than 1 suppress the influence of white noise.\n"); + bp::def("welch", welch, + "welch(signal, psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling, padding)" + "\n" + "Calculate the PSD of each row a 2D data array (float32/float64) using Welch's method.\n" + "A Hanning window is applied. OMP is used to parallelize across dets (rows) and within FFTW\n." + "\n" + "Args:\n" + " signal: input array (float32/float64) buffer with shape (ndets, nsamps)\n" + " psd: output data buffer (float32/float64) to store computed PSDs. It is modified in place.\n" + " fs: Sample rate in Hz\n" + " nperseg: size (int) of each segment to be averaged over. nperseg must be <= nsamps.\n", + " noverlap: number (int) of samples to overlap for each segment. Set to nperseg / 2 if < 0" + " detrend_method: how to detrend each row (string). Options are 'mean', 'median', 'linear', and 'none'.\n" + " detrend_ncount: 'linear_ncount' parameter for 'linear' detrending (int). See docstring for detrend.\n" + " scaling: how to normalize the averaged PSD (string). Options are 'density' or 'spectrum.'\n" + " padding: extend the number of samples so all segments have equal length (bool).\n"); } \ No newline at end of file diff --git a/test/test_array_ops.py b/test/test_array_ops.py index 20c6036a..c49057fc 100644 --- a/test/test_array_ops.py +++ b/test/test_array_ops.py @@ -425,5 +425,71 @@ def test_02_linear_detrending(self): np.testing.assert_allclose(signal_copy, signal, rtol=rtol, atol=atol) +class TestWelch(unittest.TestCase): + """ + Test Welch PSD calculation. + """ + + def test_00_psd_float32(self): + nsamps = 1000 + ndets = 3 + dtype = "float32" + order = "C" + + x = np.linspace(0., 1., nsamps, dtype=dtype) + signal = np.array([(i + 1) * np.sin(2*np.pi*x + i) for i in range(ndets)], dtype=dtype, order=order) + + fs = 200. + nperseg = 256 + noverlap = -1 # noverlap = nperseg // 2 + detrend_method = "mean" # "constant" in scipy's welch + detrend_ncount = 0 + scaling = "density" + padding = False + + npsd = (nperseg // 2) + 1 + + window = np.hanning(nperseg) + scipy_f, scipy_psd = welch(signal, fs, nperseg=nperseg, window=window, detrend="constant", scaling=scaling) + + so3g_psd = np.zeros((ndets, npsd), dtype=dtype, order=order) + so3g.welch(signal, so3g_psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling, padding) + + print('diff',np.max(so3g_psd - scipy_psd)) + + rtol = 1e-4 + atol = 1e-8 + np.testing.assert_allclose(scipy_psd, so3g_psd, rtol=rtol, atol=atol) + + def test_00_psd_float64(self): + nsamps = 1000 + ndets = 3 + dtype = "float64" + order = "C" + + x = np.linspace(0., 1., nsamps, dtype=dtype) + signal = np.array([(i + 1) * np.sin(2*np.pi*x + i) for i in range(ndets)], dtype=dtype, order=order) + + fs = 200. + nperseg = 256 + noverlap = -1 # noverlap = nperseg // 2 + detrend_method = "mean" # "constant" in scipy's welch + detrend_ncount = 0 + scaling = "density" + padding = False + + npsd = (nperseg // 2) + 1 + + window = np.hanning(nperseg) + scipy_f, scipy_psd = welch(signal, fs, nperseg=nperseg, window=window, detrend="constant", scaling=scaling) + + so3g_psd = np.zeros((ndets, npsd), dtype=dtype, order=order) + so3g.welch(signal, so3g_psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling, padding) + + rtol = 1e-10 + atol = 1e-10 + np.testing.assert_allclose(scipy_psd, so3g_psd, rtol=rtol, atol=atol) + + if __name__ == "__main__": unittest.main() From 53811c8968f7f7a82509306625824650f96255cc Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Tue, 25 Feb 2025 19:38:02 -0800 Subject: [PATCH 02/13] add cmake directory --- cmake/FindFFTW.cmake | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 cmake/FindFFTW.cmake diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake new file mode 100644 index 00000000..0f81a21d --- /dev/null +++ b/cmake/FindFFTW.cmake @@ -0,0 +1,50 @@ +# - Find FFTW +# Find the native FFTW includes and library +# +# FFTW_INCLUDES - where to find fftw3.h +# FFTW_LIBRARIES - List of libraries when using FFTW. +# FFTW_FOUND - True if FFTW found. +if (FFTW_INCLUDES) +# Already in cache, be silent +set (FFTW_FIND_QUIETLY TRUE) +endif (FFTW_INCLUDES) +find_path (FFTW_INCLUDES fftw3.h HINTS ENV FFTW_INC) +find_library (FFTW_LIBRARIES NAMES fftw3 HINTS ENV FFTW_DIR) + +#handles finding thread implementations +find_library (FFTW_OMP_LIBRARY NAMES fftw3_omp HINTS ENV FFTW_DIR) + +include (FindPackageHandleStandardArgs) +set(FPHSA_NAME_MISMATCHED TRUE) +find_package_handle_standard_args(FFTW_OMP DEFAULT_MSG FFTW_OMP_LIBRARY) +mark_as_advanced(FFTW_OMP_LIBRARY) + +# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if +# all listed variables are TRUE +find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDES) +mark_as_advanced (FFTW_LIBRARIES FFTW_INCLUDES) + +# - Find FFTWF +# FFTW_INCLUDES - where to find fftw3f.h +# FFTWF_LIBRARIES - List of libraries when using FFTWF. +# FFTWF_FOUND - True if FFTWF is found. +if (FFTW_INCLUDES) + # Already in cache, be silent + set (FFTWF_FIND_QUIETLY TRUE) +endif (FFTW_INCLUDES) + +find_path (FFTW_INCLUDES fftw3f.h HINTS ENV FFTW_INC) +find_library (FFTWF_LIBRARIES NAMES fftw3f HINTS ENV FFTW_DIR) + +# Handles finding thread implementations +find_library (FFTWF_OMP_LIBRARY NAMES fftw3f_omp HINTS ENV FFTW_DIR) + +include (FindPackageHandleStandardArgs) +set(FPHSA_NAME_MISMATCHED TRUE) +find_package_handle_standard_args(FFTWF_OMP DEFAULT_MSG FFTWF_OMP_LIBRARY) +mark_as_advanced(FFTWF_OMP_LIBRARY) + +# Handle the QUIETLY and REQUIRED arguments and set FFTWF_FOUND to TRUE if +# all listed variables are TRUE +find_package_handle_standard_args (FFTWF DEFAULT_MSG FFTWF_LIBRARIES FFTW_INCLUDES) +mark_as_advanced (FFTWF_LIBRARIES FFTW_INCLUDES) \ No newline at end of file From 79a12d45346f84b103933bd1d50c22fa0ece9dc7 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Tue, 25 Feb 2025 19:51:06 -0800 Subject: [PATCH 03/13] update dockerfile --- Dockerfile | 3 ++- src/array_ops.cxx | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1ec32b50..2233ada7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,8 @@ RUN apt update && apt install -y \ gfortran \ libopenblas-openmp-dev \ libbz2-dev \ - python-is-python3 + python-is-python3 \ + libfftw3-dev # Set the working directory WORKDIR /app_lib/so3g diff --git a/src/array_ops.cxx b/src/array_ops.cxx index 6593bbe0..4b5e1c1f 100644 --- a/src/array_ops.cxx +++ b/src/array_ops.cxx @@ -1685,4 +1685,4 @@ PYBINDINGS("so3g") " detrend_ncount: 'linear_ncount' parameter for 'linear' detrending (int). See docstring for detrend.\n" " scaling: how to normalize the averaged PSD (string). Options are 'density' or 'spectrum.'\n" " padding: extend the number of samples so all segments have equal length (bool).\n"); -} \ No newline at end of file +} From a76e4e0187537a1a063382171c212d03cdd22c2b Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Tue, 25 Feb 2025 20:08:38 -0800 Subject: [PATCH 04/13] debugging fftwf issue --- cmake/FindFFTW.cmake | 12 +++++------- docker/so3g-setup.sh | 1 + 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake index 0f81a21d..9b183b91 100644 --- a/cmake/FindFFTW.cmake +++ b/cmake/FindFFTW.cmake @@ -11,7 +11,7 @@ endif (FFTW_INCLUDES) find_path (FFTW_INCLUDES fftw3.h HINTS ENV FFTW_INC) find_library (FFTW_LIBRARIES NAMES fftw3 HINTS ENV FFTW_DIR) -#handles finding thread implementations +#handles finding omp implementations find_library (FFTW_OMP_LIBRARY NAMES fftw3_omp HINTS ENV FFTW_DIR) include (FindPackageHandleStandardArgs) @@ -24,19 +24,17 @@ mark_as_advanced(FFTW_OMP_LIBRARY) find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDES) mark_as_advanced (FFTW_LIBRARIES FFTW_INCLUDES) -# - Find FFTWF -# FFTW_INCLUDES - where to find fftw3f.h # FFTWF_LIBRARIES - List of libraries when using FFTWF. # FFTWF_FOUND - True if FFTWF is found. -if (FFTW_INCLUDES) +# if (FFTW_INCLUDES) # Already in cache, be silent - set (FFTWF_FIND_QUIETLY TRUE) -endif (FFTW_INCLUDES) +# set (FFTWF_FIND_QUIETLY TRUE) +# endif (FFTW_INCLUDES) find_path (FFTW_INCLUDES fftw3f.h HINTS ENV FFTW_INC) find_library (FFTWF_LIBRARIES NAMES fftw3f HINTS ENV FFTW_DIR) -# Handles finding thread implementations +# Handles finding omp implementations find_library (FFTWF_OMP_LIBRARY NAMES fftw3f_omp HINTS ENV FFTW_DIR) include (FindPackageHandleStandardArgs) diff --git a/docker/so3g-setup.sh b/docker/so3g-setup.sh index f8594493..fced41ee 100644 --- a/docker/so3g-setup.sh +++ b/docker/so3g-setup.sh @@ -6,6 +6,7 @@ cmake \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_BUILD_TYPE=Release \ -DPython_EXECUTABLE=$(which python3) \ + -DCMAKE_MODULE_PATH=../cmake .. make -j 2 make install From 1777a6a8d9d2a28b72ccc449264c9ce5f41f1a84 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Tue, 25 Feb 2025 20:16:10 -0800 Subject: [PATCH 05/13] fix typo --- docker/so3g-setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/so3g-setup.sh b/docker/so3g-setup.sh index fced41ee..96ec7b5c 100644 --- a/docker/so3g-setup.sh +++ b/docker/so3g-setup.sh @@ -6,7 +6,7 @@ cmake \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_BUILD_TYPE=Release \ -DPython_EXECUTABLE=$(which python3) \ - -DCMAKE_MODULE_PATH=../cmake + -DCMAKE_MODULE_PATH=../cmake \ .. make -j 2 make install From 9c0321a377ca4305601bd865636cfa57675922d6 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Tue, 25 Feb 2025 20:34:30 -0800 Subject: [PATCH 06/13] test setup --- docker/so3g-setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/so3g-setup.sh b/docker/so3g-setup.sh index 96ec7b5c..dfa16ea2 100644 --- a/docker/so3g-setup.sh +++ b/docker/so3g-setup.sh @@ -6,7 +6,7 @@ cmake \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_BUILD_TYPE=Release \ -DPython_EXECUTABLE=$(which python3) \ - -DCMAKE_MODULE_PATH=../cmake \ + -DCMAKE_MODULE_PATH=$(pwd)/../cmake \ .. make -j 2 make install From f3e9d7c6f28b1ffa1c9ea903ba21d966590483ee Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Wed, 26 Feb 2025 13:34:52 -0800 Subject: [PATCH 07/13] uncomment cmake flac include --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1508d264..93429d48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ target_link_libraries(so3g PUBLIC ${FFTWF_OMP_LIBRARY}) # FLAC- library already comes from spt3g dependencies, but # we need to have the headers. -# target_include_directories(so3g PRIVATE ${FLAC_INCLUDE_DIR}) +target_include_directories(so3g PRIVATE ${FLAC_INCLUDE_DIR}) # You probably want to select openblas, so pass -DBLA_VENDOR=OpenBLAS find_package(BLAS REQUIRED) From bc664fe594c558f455aa7d358dbe957b520d8f49 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Thu, 27 Feb 2025 22:12:42 -0800 Subject: [PATCH 08/13] general updates --- src/array_ops.cxx | 86 +++++++++++++++++++++--------------------- test/test_array_ops.py | 26 ++++++------- 2 files changed, 54 insertions(+), 58 deletions(-) diff --git a/src/array_ops.cxx b/src/array_ops.cxx index 4b5e1c1f..f7d32e7f 100644 --- a/src/array_ops.cxx +++ b/src/array_ops.cxx @@ -1270,7 +1270,7 @@ template auto _allocate_fftw_output(const int n) { if constexpr (std::is_same::value) { return static_cast(fftwf_malloc(sizeof(fftwf_complex) * n)); - } + } else if constexpr (std::is_same::value) { return static_cast(fftw_malloc(sizeof(fftw_complex) * n)); } @@ -1288,7 +1288,7 @@ auto _select_fftw_plan(T1* in, T2* out, const int ndets, int ostride = 1; // Output stride int *inembed = n; // Input array dimensions int *onembed = n; // Output array dimensions - + if constexpr (std::is_same::value) { return fftwf_plan_many_dft_r2c(rank, n, howmany, in, inembed, istride, idist, out, onembed, ostride, odist, FFTW_ESTIMATE); @@ -1304,7 +1304,7 @@ void _execute_fftw_plan(T plan) { if constexpr (std::is_same::value) { fftw_execute(plan); - } + } else if constexpr (std::is_same::value) { fftwf_execute(plan); } @@ -1312,8 +1312,8 @@ void _execute_fftw_plan(T plan) template void _welch(const bp::object & signal, bp::object & psd, const double fs, - const int nperseg, int noverlap, const std::string & detrend_method, - const int detrend_ncount, const std::string & scaling, const bool padding) + const int nperseg, int noverlap, const std::string & detrend_method, + const int detrend_linear_ncount, const std::string & scaling) { BufferWrapper signal_buf ("signal", signal, false, std::vector{-1, -1}); if (signal_buf->strides[1] != signal_buf->itemsize) @@ -1321,12 +1321,12 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, const int ndets = signal_buf->shape[0]; const int nsamps = signal_buf->shape[1]; T* signal_data = (T*)signal_buf->buf; - + BufferWrapper psd_buf ("psd", psd, false, std::vector{-1, -1}); if (psd_buf->strides[1] != psd_buf->itemsize) throw ValueError_exception("Argument 'psd' must be contiguous in last axis."); T* psd_data = (T*)psd_buf->buf; - + // Data strides int signal_stride = signal_buf->strides[0] / sizeof(T); int psd_stride = psd_buf->strides[0] / sizeof(T); @@ -1340,11 +1340,11 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, nthreads = omp_get_num_threads(); #endif } - + if constexpr (std::is_same::value) { fftwf_init_threads(); fftwf_plan_with_nthreads(nthreads); - } + } else if constexpr (std::is_same::value) { fftw_init_threads(); fftw_plan_with_nthreads(nthreads); @@ -1361,7 +1361,7 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, int npsd = (nperseg / 2) + 1; int nstep = nperseg - noverlap; - + // Window array T window[nperseg]; _hanning_window(window, nperseg); @@ -1387,21 +1387,16 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, "or 'spectrum'"); } - int nadd = 0; - if (padding) { - nadd = (-(nsamps - nperseg) % nstep) % nperseg; - } - // Number of segments to average over - int nsegments = ((nsamps + nadd) - noverlap) / nstep; + int nsegments = ((nsamps) - noverlap) / nstep; // Input array for segment T* segment = (T*) malloc(ndets * nperseg * sizeof(T)); - + // Either fftw_complex* or fftwf_complex* auto out = _allocate_fftw_output(ndets * npsd); - - // Plan creation is not thread safe and creating many upfront + + // Plan creation is not thread-safe and creating many upfront // is less efficient, so just reuse one sequentially and // parallelize internally. auto plan = _select_fftw_plan(segment, out, ndets, nperseg, npsd, @@ -1416,7 +1411,7 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, for (int i = 0; i < ndets; ++i) { int signal_ioff = i * signal_stride + start; int segment_ioff = i * nperseg; - + T* signal_row = signal_data + signal_ioff; T* segment_row = segment + segment_ioff; @@ -1424,17 +1419,17 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, for (int j = 0; j < nperseg; ++j) { segment_row[j] = signal_row[j]; } - + // More efficient to detrend each row in a parallel loop // with data copying and windowing than to do each individually // in parallel loops if (detrend_method != "none") { _detrend(segment_row, 1, nperseg, nperseg, detrend_method, - detrend_ncount, 1); + detrend_linear_ncount, 1); } // Apply window function for (int j = 0; j < nperseg; ++j) { - segment_row[j] *= window[j]; + segment_row[j] *= window[j]; } } @@ -1452,7 +1447,7 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, } } } - + // Get average psd over segments #pragma omp parallel for for (int i = 0; i < ndets; ++i) { @@ -1461,12 +1456,12 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, psd_data[ioff + j] /= nsegments; } } - + // Normalization of endpoints int end_index = npsd; - + if (nperseg % 2) { - end_index -= 1; + end_index -= 1; } #pragma omp parallel for @@ -1476,12 +1471,12 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, psd_data[ioff + j] *= 2; } } - + if constexpr (std::is_same::value) { fftw_destroy_plan(plan); fftw_free(out); fftw_cleanup_threads(); - } + } else if constexpr (std::is_same::value) { fftwf_destroy_plan(plan); fftwf_free(out); @@ -1490,20 +1485,20 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, free(segment); } -void welch(const bp::object & signal, bp::object & psd, const double fs, - const int nperseg, const int noverlap, const std::string & detrend_method, - const int detrend_ncount, const std::string & scaling, const bool padding) +void welch(const bp::object & signal, bp::object & psd, const double fs, + const int nperseg, const int noverlap, const std::string & detrend_method, + const int detrend_linear_ncount, const std::string & scaling) { // get data type int dtype = get_dtype(signal); if (dtype == NPY_FLOAT) { _welch(signal, psd, fs, nperseg, noverlap, detrend_method, - detrend_ncount, scaling, padding); + detrend_linear_ncount, scaling); } else if (dtype == NPY_DOUBLE) { _welch(signal, psd, fs, nperseg, noverlap, detrend_method, - detrend_ncount, scaling, padding); + detrend_linear_ncount, scaling); } else { throw TypeError_exception("Only float32 or float64 arrays are supported."); @@ -1670,19 +1665,24 @@ PYBINDINGS("so3g") " detrend. Must be a positive integer or -1. If -1, nsamps / 2 will be used. Values " " larger than 1 suppress the influence of white noise.\n"); bp::def("welch", welch, - "welch(signal, psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling, padding)" + "welch(signal, psd, fs, nperseg, noverlap, detrend_method, detrend_linear_ncount, scaling)" "\n" "Calculate the PSD of each row a 2D data array (float32/float64) using Welch's method.\n" - "A Hanning window is applied. OMP is used to parallelize across dets (rows) and within FFTW\n." + "A Hanning window is applied. OMP is used to parallelize across dets (rows) and within FFTW.\n" + "Uses mean normalization for PSD segments. Based on the scipy implementation of Welch's method:\n" + "https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html" "\n" "Args:\n" " signal: input array (float32/float64) buffer with shape (ndets, nsamps)\n" - " psd: output data buffer (float32/float64) to store computed PSDs. It is modified in place.\n" - " fs: Sample rate in Hz\n" - " nperseg: size (int) of each segment to be averaged over. nperseg must be <= nsamps.\n", - " noverlap: number (int) of samples to overlap for each segment. Set to nperseg / 2 if < 0" + " psd: output data buffer (float32/float64) to store computed PSDs with shape\n" + " (ndets,(nperseg // 2) + 1). It is modified in place.\n" + " fs: Sample rate in Hz (double)\n" + " nperseg: size (int) of each segment to be averaged over. nperseg must be <= nsamps.\n" + " noverlap: number (int) of samples to overlap for each segment. Set to nperseg / 2 if < 0\n" " detrend_method: how to detrend each row (string). Options are 'mean', 'median', 'linear', and 'none'.\n" - " detrend_ncount: 'linear_ncount' parameter for 'linear' detrending (int). See docstring for detrend.\n" - " scaling: how to normalize the averaged PSD (string). Options are 'density' or 'spectrum.'\n" - " padding: extend the number of samples so all segments have equal length (bool).\n"); + " See docstring for detrend.\n" + " detrend_linear_ncount: 'linear_ncount' parameter for 'linear' detrending (int). See docstring for detrend.\n" + " scaling: how to normalize the averaged PSD (string). Options are 'density' or 'spectrum.'\n" + " density normalizes by 1.0 / (fs * sum(window^2)). spectrum normalizes by \n" + " 1.0 / (sum(window)^2\n"); } diff --git a/test/test_array_ops.py b/test/test_array_ops.py index c49057fc..97a76d2f 100644 --- a/test/test_array_ops.py +++ b/test/test_array_ops.py @@ -429,7 +429,7 @@ class TestWelch(unittest.TestCase): """ Test Welch PSD calculation. """ - + def test_00_psd_float32(self): nsamps = 1000 ndets = 3 @@ -438,29 +438,26 @@ def test_00_psd_float32(self): x = np.linspace(0., 1., nsamps, dtype=dtype) signal = np.array([(i + 1) * np.sin(2*np.pi*x + i) for i in range(ndets)], dtype=dtype, order=order) - + fs = 200. nperseg = 256 noverlap = -1 # noverlap = nperseg // 2 detrend_method = "mean" # "constant" in scipy's welch detrend_ncount = 0 scaling = "density" - padding = False - + npsd = (nperseg // 2) + 1 - + window = np.hanning(nperseg) scipy_f, scipy_psd = welch(signal, fs, nperseg=nperseg, window=window, detrend="constant", scaling=scaling) - + so3g_psd = np.zeros((ndets, npsd), dtype=dtype, order=order) - so3g.welch(signal, so3g_psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling, padding) + so3g.welch(signal, so3g_psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling) - print('diff',np.max(so3g_psd - scipy_psd)) - rtol = 1e-4 atol = 1e-8 np.testing.assert_allclose(scipy_psd, so3g_psd, rtol=rtol, atol=atol) - + def test_00_psd_float64(self): nsamps = 1000 ndets = 3 @@ -469,23 +466,22 @@ def test_00_psd_float64(self): x = np.linspace(0., 1., nsamps, dtype=dtype) signal = np.array([(i + 1) * np.sin(2*np.pi*x + i) for i in range(ndets)], dtype=dtype, order=order) - + fs = 200. nperseg = 256 noverlap = -1 # noverlap = nperseg // 2 detrend_method = "mean" # "constant" in scipy's welch detrend_ncount = 0 scaling = "density" - padding = False - + npsd = (nperseg // 2) + 1 window = np.hanning(nperseg) scipy_f, scipy_psd = welch(signal, fs, nperseg=nperseg, window=window, detrend="constant", scaling=scaling) so3g_psd = np.zeros((ndets, npsd), dtype=dtype, order=order) - so3g.welch(signal, so3g_psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling, padding) - + so3g.welch(signal, so3g_psd, fs, nperseg, noverlap, detrend_method, detrend_ncount, scaling) + rtol = 1e-10 atol = 1e-10 np.testing.assert_allclose(scipy_psd, so3g_psd, rtol=rtol, atol=atol) From ccddbe781d6411574c5ceb35902e0e77e4560eb1 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Thu, 27 Feb 2025 22:18:56 -0800 Subject: [PATCH 09/13] clean up FindFFTW --- cmake/FindFFTW.cmake | 47 ++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/cmake/FindFFTW.cmake b/cmake/FindFFTW.cmake index 9b183b91..d9dfb4e3 100644 --- a/cmake/FindFFTW.cmake +++ b/cmake/FindFFTW.cmake @@ -1,48 +1,39 @@ -# - Find FFTW -# Find the native FFTW includes and library -# +# Find FFTW # FFTW_INCLUDES - where to find fftw3.h -# FFTW_LIBRARIES - List of libraries when using FFTW. -# FFTW_FOUND - True if FFTW found. +# FFTW_LIBRARIES - List of libraries when using FFTW +# FFTW_FOUND - True if FFTW is found + if (FFTW_INCLUDES) -# Already in cache, be silent -set (FFTW_FIND_QUIETLY TRUE) -endif (FFTW_INCLUDES) + set (FFTW_FIND_QUIETLY TRUE) # Already in cache, be silent +endif () + find_path (FFTW_INCLUDES fftw3.h HINTS ENV FFTW_INC) find_library (FFTW_LIBRARIES NAMES fftw3 HINTS ENV FFTW_DIR) - -#handles finding omp implementations -find_library (FFTW_OMP_LIBRARY NAMES fftw3_omp HINTS ENV FFTW_DIR) +find_library (FFTW_OMP_LIBRARY NAMES fftw3_omp HINTS ENV FFTW_DIR) # Find OMP implementation include (FindPackageHandleStandardArgs) set(FPHSA_NAME_MISMATCHED TRUE) find_package_handle_standard_args(FFTW_OMP DEFAULT_MSG FFTW_OMP_LIBRARY) mark_as_advanced(FFTW_OMP_LIBRARY) -# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if -# all listed variables are TRUE -find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDES) -mark_as_advanced (FFTW_LIBRARIES FFTW_INCLUDES) +find_package_handle_standard_args(FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDES) +mark_as_advanced(FFTW_LIBRARIES FFTW_INCLUDES) -# FFTWF_LIBRARIES - List of libraries when using FFTWF. -# FFTWF_FOUND - True if FFTWF is found. -# if (FFTW_INCLUDES) - # Already in cache, be silent -# set (FFTWF_FIND_QUIETLY TRUE) -# endif (FFTW_INCLUDES) +# FFTWF_LIBRARIES - List of libraries when using FFTWF +# FFTWF_FOUND - True if FFTWF is found + +if (FFTW_INCLUDES) + set (FFTWF_FIND_QUIETLY TRUE) # Already in cache, be silent +endif () find_path (FFTW_INCLUDES fftw3f.h HINTS ENV FFTW_INC) find_library (FFTWF_LIBRARIES NAMES fftw3f HINTS ENV FFTW_DIR) - -# Handles finding omp implementations -find_library (FFTWF_OMP_LIBRARY NAMES fftw3f_omp HINTS ENV FFTW_DIR) +find_library (FFTWF_OMP_LIBRARY NAMES fftw3f_omp HINTS ENV FFTW_DIR) # Find OMP implementation include (FindPackageHandleStandardArgs) set(FPHSA_NAME_MISMATCHED TRUE) find_package_handle_standard_args(FFTWF_OMP DEFAULT_MSG FFTWF_OMP_LIBRARY) mark_as_advanced(FFTWF_OMP_LIBRARY) -# Handle the QUIETLY and REQUIRED arguments and set FFTWF_FOUND to TRUE if -# all listed variables are TRUE -find_package_handle_standard_args (FFTWF DEFAULT_MSG FFTWF_LIBRARIES FFTW_INCLUDES) -mark_as_advanced (FFTWF_LIBRARIES FFTW_INCLUDES) \ No newline at end of file +find_package_handle_standard_args(FFTWF DEFAULT_MSG FFTWF_LIBRARIES FFTW_INCLUDES) +mark_as_advanced(FFTWF_LIBRARIES FFTW_INCLUDES) From f00a14dd82f0e30013d64e60bb008e5ef26a3931 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Thu, 27 Feb 2025 22:31:08 -0800 Subject: [PATCH 10/13] add additional checks --- src/array_ops.cxx | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/array_ops.cxx b/src/array_ops.cxx index f7d32e7f..56a96bf0 100644 --- a/src/array_ops.cxx +++ b/src/array_ops.cxx @@ -1322,11 +1322,22 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, const int nsamps = signal_buf->shape[1]; T* signal_data = (T*)signal_buf->buf; - BufferWrapper psd_buf ("psd", psd, false, std::vector{-1, -1}); + BufferWrapper psd_buf ("psd", psd, false, std::vector{-1, nperseg / 2 + 1}); if (psd_buf->strides[1] != psd_buf->itemsize) throw ValueError_exception("Argument 'psd' must be contiguous in last axis."); + const int npsd = psd_buf->shape[1]; T* psd_data = (T*)psd_buf->buf; + if (nperseg > nsamps) { + throw ValueError_exception("nperseg must be <= nsamps"); + } + if (noverlap >= nperseg) { + throw ValueError_exception("noverlap must be < nperseg"); + } + if (fs <= 0) { + throw ValueError_exception("fs must be > 0"); + } + // Data strides int signal_stride = signal_buf->strides[0] / sizeof(T); int psd_stride = psd_buf->strides[0] / sizeof(T); @@ -1350,16 +1361,11 @@ void _welch(const bp::object & signal, bp::object & psd, const double fs, fftw_plan_with_nthreads(nthreads); } - if (nperseg > nsamps) { - throw ValueError_exception("nperseg > nsamps'"); - } - // Default noverlap if (noverlap < 0) { noverlap = nperseg / 2; } - int npsd = (nperseg / 2) + 1; int nstep = nperseg - noverlap; // Window array From 4062f29ec34ec03ad797a42682e280c1a38fccf7 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Thu, 10 Jul 2025 12:29:30 -0700 Subject: [PATCH 11/13] fix typo --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 721ffcfc..df6722ee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ RUN apt update && apt install -y \ libopenblas-openmp-dev \ libbz2-dev \ python-is-python3 \ - libfftw3-dev + libfftw3-dev \ libgoogle-glog-dev \ libgflags-dev \ libmetis-dev \ From 4e798f4fb4295d074caa0683559d5babedb49e54 Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Thu, 10 Jul 2025 13:13:11 -0700 Subject: [PATCH 12/13] change ceres library name --- cmake/FindCeres.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/FindCeres.cmake b/cmake/FindCeres.cmake index 686c4463..fc79fc05 100644 --- a/cmake/FindCeres.cmake +++ b/cmake/FindCeres.cmake @@ -15,8 +15,8 @@ find_package(Glog REQUIRED) find_package(Gflags REQUIRED) # Create the imported Ceres target -add_library(Ceres::Ceres UNKNOWN IMPORTED) -set_target_properties(Ceres::Ceres PROPERTIES +add_library(Ceres::ceres UNKNOWN IMPORTED) +set_target_properties(Ceres::ceres PROPERTIES IMPORTED_LOCATION "${CERES_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${CERES_INCLUDE_DIR};${EIGEN3_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "${GLOG_LIBRARIES};${GFLAGS_LIBRARIES};${CERES_LIBRARY}" From ea80107560b2e7c7edbcade1a312f1a042f3870b Mon Sep 17 00:00:00 2001 From: Michael McCrackan Date: Thu, 10 Jul 2025 13:28:36 -0700 Subject: [PATCH 13/13] undo change --- CMakeLists.txt | 2 +- cmake/FindCeres.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1efab4ba..15fa262e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,7 @@ target_link_libraries(so3g PUBLIC spt3g::core) target_include_directories(so3g PRIVATE ${GSL_INCLUDE_DIR}) target_link_libraries(so3g PUBLIC ${GSL_LIBRARIES}) # Link Ceres -target_link_libraries(so3g PUBLIC Ceres::ceres Eigen3::Eigen) +target_link_libraries(so3g PUBLIC Ceres::Ceres Eigen3::Eigen) # Link FFTW target_include_directories(so3g PRIVATE ${FFTW_INCLUDE_DIRS}) diff --git a/cmake/FindCeres.cmake b/cmake/FindCeres.cmake index fc79fc05..686c4463 100644 --- a/cmake/FindCeres.cmake +++ b/cmake/FindCeres.cmake @@ -15,8 +15,8 @@ find_package(Glog REQUIRED) find_package(Gflags REQUIRED) # Create the imported Ceres target -add_library(Ceres::ceres UNKNOWN IMPORTED) -set_target_properties(Ceres::ceres PROPERTIES +add_library(Ceres::Ceres UNKNOWN IMPORTED) +set_target_properties(Ceres::Ceres PROPERTIES IMPORTED_LOCATION "${CERES_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${CERES_INCLUDE_DIR};${EIGEN3_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "${GLOG_LIBRARIES};${GFLAGS_LIBRARIES};${CERES_LIBRARY}"