From 30612a69eb2461e3d3811daca37de3afaf20b15a Mon Sep 17 00:00:00 2001 From: Adam Wildavsky Date: Fri, 12 Jun 2026 21:08:32 -0400 Subject: [PATCH 1/2] Initial parallelization by Cursor --- library/tests/args.cpp | 7 +-- library/tests/dtest.cpp | 4 ++ library/tests/dtest_parallel.cpp | 95 ++++++++++++++++++++++++++++++++ library/tests/dtest_parallel.hpp | 28 ++++++++++ library/tests/loop.cpp | 67 ++++++++++++++++++++-- 5 files changed, 192 insertions(+), 9 deletions(-) create mode 100644 library/tests/dtest_parallel.cpp create mode 100644 library/tests/dtest_parallel.hpp diff --git a/library/tests/args.cpp b/library/tests/args.cpp index 8c4c6c44..e83e4635 100644 --- a/library/tests/args.cpp +++ b/library/tests/args.cpp @@ -89,10 +89,9 @@ void usage( "-s, --solver One of: solve, calc, play, par, dealerpar.\n" << " (Default: solve)\n" << "\n" << - "-n, --numthr n Maximum number of threads (legacy option).\n" << - " (Default: 0 uses DDS/library defaults; when using\n" << - " the modern SolverContext API, prefer configuring\n" << - " threads via SolverConfig instead of this option.)\n" << + "-n, --numthr n Worker threads for solve/calc/play batches.\n" << + " 0 = auto (hardware concurrency), 1 = sequential.\n" << + " (Default: 0)\n" << "\n" << "-m, --memory n Total DDS memory size in MB (legacy option).\n" << " (Default: 0 uses DDS/library defaults; when using\n" << diff --git a/library/tests/dtest.cpp b/library/tests/dtest.cpp index d4640cdd..1116fb1f 100644 --- a/library/tests/dtest.cpp +++ b/library/tests/dtest.cpp @@ -33,6 +33,10 @@ int main(int argc, char * argv[]) DDSInfo info; GetDDSInfo(&info); cout << info.systemString << endl; + if (options.num_threads_ == 0) + cout << "dtest worker threads: auto\n"; + else + cout << "dtest worker threads: " << options.num_threads_ << "\n"; real_main(argc, argv); diff --git a/library/tests/dtest_parallel.cpp b/library/tests/dtest_parallel.cpp new file mode 100644 index 00000000..778bf26f --- /dev/null +++ b/library/tests/dtest_parallel.cpp @@ -0,0 +1,95 @@ +/* + DDS, a bridge double dummy solver. + + Copyright (C) 2006-2014 by Bo Haglund / + 2014-2018 by Bo Haglund & Soren Hein. + + See LICENSE and README. +*/ + +#include "dtest_parallel.hpp" + +#include +#include +#include +#include + +#include + + +int dtest_effective_threads(const int requested, const int workload) +{ + if (workload <= 1) + return 1; + + const unsigned hw = std::thread::hardware_concurrency(); + const int auto_count = hw > 0 ? static_cast(hw) : 1; + + int n = requested > 0 ? requested : auto_count; + n = std::max(1, std::min(n, workload)); + return n; +} + + +int dtest_run_parallel( + const int count, + const int requested_threads, + const std::function & body) +{ + if (count <= 0) + return RETURN_NO_FAULT; + + const int nthreads = dtest_effective_threads(requested_threads, count); + if (nthreads <= 1) + { + for (int i = 0; i < count; ++i) + { + const int rc = body(i); + if (rc != RETURN_NO_FAULT) + return rc; + } + return RETURN_NO_FAULT; + } + + std::atomic next{0}; + std::atomic first_error{0}; + + auto worker = [&] { + for (;;) + { + const int i = next.fetch_add(1, std::memory_order_relaxed); + if (i >= count || first_error.load(std::memory_order_relaxed) != 0) + break; + + const int rc = body(i); + if (rc != RETURN_NO_FAULT) + { + int expected = 0; + first_error.compare_exchange_strong( + expected, rc, std::memory_order_relaxed); + break; + } + } + }; + + std::vector threads; + threads.reserve(static_cast(nthreads)); + try + { + for (int t = 0; t < nthreads; ++t) + threads.emplace_back(worker); + } + catch (...) + { + for (auto & th : threads) + if (th.joinable()) + th.join(); + throw; + } + + for (auto & th : threads) + th.join(); + + const int err = first_error.load(std::memory_order_relaxed); + return err != 0 ? err : RETURN_NO_FAULT; +} diff --git a/library/tests/dtest_parallel.hpp b/library/tests/dtest_parallel.hpp new file mode 100644 index 00000000..0e88414a --- /dev/null +++ b/library/tests/dtest_parallel.hpp @@ -0,0 +1,28 @@ +/* + DDS, a bridge double dummy solver. + + Copyright (C) 2006-2014 by Bo Haglund / + 2014-2018 by Bo Haglund & Soren Hein. + + See LICENSE and README. +*/ + +#pragma once + +#include + +/// Resolve the worker thread count for a dtest batch. +/// +/// @param requested Thread count from -n (0 = auto from hardware). +/// @param workload Number of independent items in the batch. +/// @return Thread count in [1, workload]. +int dtest_effective_threads(int requested, int workload); + +/// Run @p body for each index in [0, count) using up to @p requested_threads workers. +/// +/// @p body must return RETURN_NO_FAULT (1) on success. +/// @return First non-success code from @p body, or RETURN_NO_FAULT. +int dtest_run_parallel( + int count, + int requested_threads, + const std::function & body); diff --git a/library/tests/loop.cpp b/library/tests/loop.cpp index 10a75fcc..11a22c9e 100644 --- a/library/tests/loop.cpp +++ b/library/tests/loop.cpp @@ -16,6 +16,8 @@ #include "TestTimer.hpp" #include "compare.hpp" #include "print.hpp" +#include "cst.hpp" +#include "dtest_parallel.hpp" using std::cout; using std::endl; @@ -26,6 +28,7 @@ using std::right; #define BATCHTIMES extern TestTimer timer; +extern OptionsType options; void loop_solve( @@ -57,7 +60,28 @@ void loop_solve( timer.start(count); int ret; - if ((ret = SolveAllChunks(bop, solvedbdp, 1)) != RETURN_NO_FAULT) + if (dtest_effective_threads(options.num_threads_, count) <= 1) + { + ret = SolveAllBoardsSeq(bop, solvedbdp); + } + else + { + solvedbdp->no_of_boards = count; + ret = dtest_run_parallel(count, options.num_threads_, + [&](const int j) -> int { + FutureTricks fut; + const int res = SolveBoardPBN( + bop->deals[j], bop->target[j], bop->solutions[j], bop->mode[j], + &fut, 0); + if (res == RETURN_NO_FAULT) + { + solvedbdp->solved_board[j] = fut; + return RETURN_NO_FAULT; + } + return res; + }); + } + if (ret != RETURN_NO_FAULT) { cout << "loop_solve: i " << i << ", return " << ret << "\n"; exit(0); @@ -114,8 +138,29 @@ bool loop_calc( timer.start(count); int ret; - if ((ret = CalcAllTablesPBN(dealsp, -1, filter, resp, parp)) - != RETURN_NO_FAULT) + if (dtest_effective_threads(options.num_threads_, count) <= 1) + { + ret = CalcAllTablesPBN(dealsp, -1, filter, resp, parp); + } + else + { + ret = dtest_run_parallel(count, options.num_threads_, + [&](const int j) -> int { + return CalcDDtablePBN(dealsp->deals[j], &resp->results[j]); + }); + if (ret == RETURN_NO_FAULT) + { + int strains = 0; + for (int k = 0; k < DDS_STRAINS; k++) + { + if (!filter[k]) + strains++; + } + // Match CalcAllTablesPBN accounting: 4 declarers per strain-board. + resp->no_of_boards = 4 * count * strains; + } + } + if (ret != RETURN_NO_FAULT) { cout << "loop_calc: i " << i << ", return " << ret << "\n"; exit(0); @@ -270,8 +315,20 @@ bool loop_play( timer.start(count); int ret; - if ((ret = AnalyseAllPlaysPBN(bop, playsp, solvedplp, 1)) - != RETURN_NO_FAULT) + if (dtest_effective_threads(options.num_threads_, count) <= 1) + { + ret = AnalyseAllPlaysPBN(bop, playsp, solvedplp, 1); + } + else + { + solvedplp->no_of_boards = count; + ret = dtest_run_parallel(count, options.num_threads_, + [&](const int j) -> int { + return AnalysePlayPBN( + bop->deals[j], playsp->plays[j], &solvedplp->solved[j], 0); + }); + } + if (ret != RETURN_NO_FAULT) { printf("loop_play i %i: Return %d\n", i, ret); cout << "loop_play: i " << i << ": " << "return " << ret << "\n"; From 2bfa2723bea230112b1ce61448c94330b4efddd5 Mon Sep 17 00:00:00 2001 From: Adam Wildavsky Date: Fri, 12 Jun 2026 21:43:14 -0400 Subject: [PATCH 2/2] =?UTF-8?q?Parallelize=20inside=20the=20library,=20mir?= =?UTF-8?q?roring=20solve=5Fall=5Fboards=5Fn=20=E2=80=94=20each=20worker?= =?UTF-8?q?=20gets=20a=20persistent=20SolverContext=20and=20pulls=20boards?= =?UTF-8?q?=20from=20a=20shared=20queue.=20While=20still=2050%=20slower=20?= =?UTF-8?q?than=20v=202.9,=20this=20produces=20a=20roughly=2030%=20wall-cl?= =?UTF-8?q?ock=20improvement=20when=20solving=20100=20deals,=20e.g.,=20./b?= =?UTF-8?q?azel-bin/library/tests/dtest=20-f=20hands/list100.txt=20-s=20ca?= =?UTF-8?q?l?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- library/src/calc_tables.cpp | 78 +++++++++++++++++++++++++++++++++++-- library/tests/loop.cpp | 26 ++----------- 2 files changed, 78 insertions(+), 26 deletions(-) diff --git a/library/src/calc_tables.cpp b/library/src/calc_tables.cpp index 96555a82..190156e7 100644 --- a/library/src/calc_tables.cpp +++ b/library/src/calc_tables.cpp @@ -8,6 +8,10 @@ */ #include "calc_tables.hpp" +#include +#include +#include + #include #include #include @@ -102,13 +106,81 @@ auto calc_all_boards_n( return RETURN_NO_FAULT; } -// Legacy overload: creates temporary context +// Legacy overload: parallel across boards, one SolverContext per worker. auto calc_all_boards_n( Boards * bop, SolvedBoards * solvedp) -> int { - SolverContext ctx; - return calc_all_boards_n(ctx, bop, solvedp); + const int n = bop->no_of_boards; + if (n > MAXNOOFBOARDS) + return RETURN_TOO_MANY_BOARDS; + + for (int k = 0; k < MAXNOOFBOARDS; k++) + solvedp->solved_board[k].cards = 0; + + const int nthreads = std::max(1, + std::min(static_cast(std::thread::hardware_concurrency()), n)); + + if (nthreads <= 1) + { + SolverContext ctx; + return calc_all_boards_n(ctx, bop, solvedp); + } + + std::vector contexts(static_cast(nthreads)); + std::atomic next_board{0}; + std::atomic first_error{0}; + + auto worker = [&](const int worker_id) { + for (;;) + { + const int bno = next_board.fetch_add(1, std::memory_order_relaxed); + if (bno >= n || first_error.load(std::memory_order_relaxed) != 0) + break; + + const int err = calc_single_common_internal( + contexts[static_cast(worker_id)], *bop, *solvedp, bno); + if (err != 1) + { + int expected = 0; + first_error.compare_exchange_strong( + expected, err, std::memory_order_relaxed); + break; + } + } + }; + + START_BLOCK_TIMER; + { + std::vector threads; + threads.reserve(static_cast(nthreads)); + try + { + for (int i = 0; i < nthreads; ++i) + threads.emplace_back(worker, i); + } + catch (...) + { + for (auto & t : threads) + if (t.joinable()) + t.join(); + throw; + } + for (auto & t : threads) + t.join(); + } + END_BLOCK_TIMER; + + if (const int err = first_error.load(); err != 0) + return err; + + solvedp->no_of_boards = n; + +#ifdef DDS_SCHEDULER + scheduler.PrintTiming(); +#endif + + return RETURN_NO_FAULT; } diff --git a/library/tests/loop.cpp b/library/tests/loop.cpp index 11a22c9e..ecc6b4eb 100644 --- a/library/tests/loop.cpp +++ b/library/tests/loop.cpp @@ -16,6 +16,8 @@ #include "TestTimer.hpp" #include "compare.hpp" #include "print.hpp" +#include + #include "cst.hpp" #include "dtest_parallel.hpp" @@ -137,29 +139,7 @@ bool loop_calc( strcpy(dealsp->deals[j].cards, deal_list[i+j].remainCards); timer.start(count); - int ret; - if (dtest_effective_threads(options.num_threads_, count) <= 1) - { - ret = CalcAllTablesPBN(dealsp, -1, filter, resp, parp); - } - else - { - ret = dtest_run_parallel(count, options.num_threads_, - [&](const int j) -> int { - return CalcDDtablePBN(dealsp->deals[j], &resp->results[j]); - }); - if (ret == RETURN_NO_FAULT) - { - int strains = 0; - for (int k = 0; k < DDS_STRAINS; k++) - { - if (!filter[k]) - strains++; - } - // Match CalcAllTablesPBN accounting: 4 declarers per strain-board. - resp->no_of_boards = 4 * count * strains; - } - } + const int ret = CalcAllTablesPBN(dealsp, -1, filter, resp, parp); if (ret != RETURN_NO_FAULT) { cout << "loop_calc: i " << i << ", return " << ret << "\n";