From be0d29550c397c3dd9f36d220c3386c51ff8c873 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Wed, 20 May 2026 13:08:50 -0400 Subject: [PATCH 1/5] update parsec to 0a7c845a92 Signed-off-by: George Bosilca --- parsec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsec b/parsec index b3e7e24c..0a7c845a 160000 --- a/parsec +++ b/parsec @@ -1 +1 @@ -Subproject commit b3e7e24c4ab42076ee39a520f1540a9fe6b553db +Subproject commit 0a7c845a92cfd414b2f90a35020fe39efaec6c34 From 293da72455513069e663e1a4cdd097bb79dd33ac Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 19 May 2026 14:48:14 -0400 Subject: [PATCH 2/5] cmake: fix LAPACKE library discovery from directory cache values Do not accept cached LAPACKE component library values that point to a directory instead of an actual library file. Treat the directory and its parent as additional search hints, clear the bad cache entry, and rerun find_library() so LAPACKE::LAPACKE resolves to the real liblapacke. This fixes configurations where LAPACKE_LAPACKE_LIB was set to something like /opt/homebrew/opt/lapack/lib, causing libdplasma to miss the LAPACKE dependency and fail with unresolved LAPACKE symbols. Signed-off-by: George Bosilca --- cmake_modules/FindLAPACKE.cmake | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cmake_modules/FindLAPACKE.cmake b/cmake_modules/FindLAPACKE.cmake index 69c93649..b3c03923 100644 --- a/cmake_modules/FindLAPACKE.cmake +++ b/cmake_modules/FindLAPACKE.cmake @@ -48,6 +48,7 @@ # Copyright (c) 2019-2024 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. +# Copyright (c) 2026 NVIDIA Corporation. All rights reserved. # # $COPYRIGHT$ # @@ -120,6 +121,19 @@ set(LAPACKE_FIND_ALL_COMPONENTS 0) # ============================================================================== macro(_find_library_with_header component incname) + if(LAPACKE_${component}_LIB) + get_filename_component(_lapacke_lib_ext "${LAPACKE_${component}_LIB}" EXT) + if(IS_DIRECTORY "${LAPACKE_${component}_LIB}" AND NOT _lapacke_lib_ext STREQUAL ".framework") + list(APPEND LAPACKE_SEARCH_PATHS "${LAPACKE_${component}_LIB}") + get_filename_component(_lapacke_lib_parent "${LAPACKE_${component}_LIB}" DIRECTORY) + list(APPEND LAPACKE_SEARCH_PATHS "${_lapacke_lib_parent}") + unset(LAPACKE_${component}_LIB CACHE) + unset(LAPACKE_${component}_LIB) + unset(_lapacke_lib_parent) + endif() + unset(_lapacke_lib_ext) + endif() + find_library(LAPACKE_${component}_LIB NAMES ${ARGN} NAMES_PER_DIR From ab9c97f47e05a9bb2474ba6ee3cd10d92452463c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 19 May 2026 14:49:34 -0400 Subject: [PATCH 3/5] tests: update DTD arena datatype management Replace the removed pre-release PaRSEC DTD arena helpers with the current PaRSEC v4 API in DPLASMA DTD tests. Allocate DTD arena datatypes explicitly with parsec_arena_datatype_new(), attach them with parsec_dtd_attach_arena_datatype(), and release them with parsec_dtd_free_arena_datatype(). This fixes DTD test builds against the embedded PaRSEC runtime. Also update the touched test file copyrights for NVIDIA. Signed-off-by: George Bosilca --- tests/testing_zgemm_dtd.c | 13 +++++++------ tests/testing_zgeqrf_dtd.c | 13 +++++++------ tests/testing_zgeqrf_dtd_untied.c | 25 +++++++++++++------------ tests/testing_zgetrf_incpiv_dtd.c | 19 ++++++++++--------- tests/testing_zpotrf_dtd.c | 7 ++++--- tests/testing_zpotrf_dtd_untied.c | 7 ++++--- 6 files changed, 45 insertions(+), 39 deletions(-) diff --git a/tests/testing_zgemm_dtd.c b/tests/testing_zgemm_dtd.c index a9e5a602..440399a6 100644 --- a/tests/testing_zgemm_dtd.c +++ b/tests/testing_zgemm_dtd.c @@ -2,6 +2,7 @@ * Copyright (c) 2015-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -93,7 +94,8 @@ int main(int argc, char ** argv) parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); /* Default type */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -260,8 +262,7 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); /* Cleaning data arrays we allocated for communication */ - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_data_free(dcA.mat); @@ -321,7 +322,8 @@ int main(int argc, char ** argv) /* Allocating data arrays to be used by comm engine */ /* Default type */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -504,8 +506,7 @@ int main(int argc, char ** argv) parsec_taskpool_free( dtd_tp ); /* Cleaning data arrays we allocated for communication */ - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_data_free(dcA.mat); diff --git a/tests/testing_zgeqrf_dtd.c b/tests/testing_zgeqrf_dtd.c index 26c01e8c..34ba04cf 100644 --- a/tests/testing_zgeqrf_dtd.c +++ b/tests/testing_zgeqrf_dtd.c @@ -2,6 +2,7 @@ * Copyright (c) 2015-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -218,13 +219,15 @@ int main(int argc, char **argv) /* Allocating data arrays to be used by comm engine */ /* Default type */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, parsec_datatype_double_complex_t, dcA.super.mb ); - parsec_arena_datatype_t *tile_rectangle = parsec_dtd_create_arena_datatype(parsec, &TILE_RECTANGLE); + parsec_arena_datatype_t *tile_rectangle = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_rectangle, &TILE_RECTANGLE); dplasma_add2arena_rectangle( tile_rectangle, dcT.super.mb*dcT.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -398,10 +401,8 @@ int main(int argc, char **argv) } /* Cleaning data arrays we allocated for communication */ - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); - dplasma_matrix_del2arena( tile_rectangle ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_RECTANGLE); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_RECTANGLE); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcT ); diff --git a/tests/testing_zgeqrf_dtd_untied.c b/tests/testing_zgeqrf_dtd_untied.c index 4f85c1d9..20740dca 100644 --- a/tests/testing_zgeqrf_dtd_untied.c +++ b/tests/testing_zgeqrf_dtd_untied.c @@ -2,6 +2,7 @@ * Copyright (c) 2015-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -341,27 +342,31 @@ int main(int argc, char ** argv) /* Allocating data arrays to be used by comm engine */ /* Default type */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, parsec_datatype_double_complex_t, dcA.super.mb ); /* Lower triangular part of tile without diagonal */ - parsec_arena_datatype_t *tile_lower = parsec_dtd_create_arena_datatype(parsec, &TILE_LOWER); + parsec_arena_datatype_t *tile_lower = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_lower, &TILE_LOWER); dplasma_add2arena_lower( tile_lower, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, parsec_datatype_double_complex_t, dcA.super.mb, 0 ); /* Upper triangular part of tile with diagonal */ - parsec_arena_datatype_t *tile_upper = parsec_dtd_create_arena_datatype(parsec, &TILE_UPPER); + parsec_arena_datatype_t *tile_upper = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_upper, &TILE_UPPER); dplasma_add2arena_upper( tile_upper, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, parsec_datatype_double_complex_t, dcA.super.mb, 1 ); - parsec_arena_datatype_t *tile_rectangle = parsec_dtd_create_arena_datatype(parsec, &TILE_RECTANGLE); + parsec_arena_datatype_t *tile_rectangle = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_rectangle, &TILE_RECTANGLE); dplasma_add2arena_rectangle( tile_rectangle, dcT.super.mb*dcT.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -447,14 +452,10 @@ int main(int argc, char ** argv) } /* Cleaning data arrays we allocated for communication */ - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); - dplasma_matrix_del2arena( tile_lower ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_LOWER); - dplasma_matrix_del2arena( tile_upper ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_UPPER); - dplasma_matrix_del2arena( tile_rectangle ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_RECTANGLE); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_LOWER); + parsec_dtd_free_arena_datatype(parsec, TILE_UPPER); + parsec_dtd_free_arena_datatype(parsec, TILE_RECTANGLE); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcT ); diff --git a/tests/testing_zgetrf_incpiv_dtd.c b/tests/testing_zgetrf_incpiv_dtd.c index 298729c9..002864b4 100644 --- a/tests/testing_zgetrf_incpiv_dtd.c +++ b/tests/testing_zgetrf_incpiv_dtd.c @@ -2,6 +2,7 @@ * Copyright (c) 2015-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -245,21 +246,24 @@ int main(int argc, char ** argv) /* Allocating data arrays to be used by comm engine */ /* A */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, parsec_datatype_double_complex_t, dcA.super.mb ); /* IPIV */ - parsec_arena_datatype_t *tile_rectangle = parsec_dtd_create_arena_datatype(parsec, &TILE_RECTANGLE); + parsec_arena_datatype_t *tile_rectangle = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_rectangle, &TILE_RECTANGLE); dplasma_add2arena_rectangle( tile_rectangle, dcA.super.mb*sizeof(int), PARSEC_ARENA_ALIGNMENT_SSE, parsec_datatype_int_t, dcA.super.mb, 1, -1 ); /* L */ - parsec_arena_datatype_t *l_tile_rectangle = parsec_dtd_create_arena_datatype(parsec, &L_TILE_RECTANGLE); + parsec_arena_datatype_t *l_tile_rectangle = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, l_tile_rectangle, &L_TILE_RECTANGLE); dplasma_add2arena_rectangle( l_tile_rectangle, dcL.super.mb*dcL.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -447,12 +451,9 @@ int main(int argc, char ** argv) } /* Cleaning data arrays we allocated for communication */ - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); - dplasma_matrix_del2arena( tile_rectangle ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_RECTANGLE); - dplasma_matrix_del2arena( l_tile_rectangle ); - parsec_dtd_destroy_arena_datatype(parsec, L_TILE_RECTANGLE); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_RECTANGLE); + parsec_dtd_free_arena_datatype(parsec, L_TILE_RECTANGLE); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcL ); diff --git a/tests/testing_zpotrf_dtd.c b/tests/testing_zpotrf_dtd.c index 7937d801..51bc38e1 100644 --- a/tests/testing_zpotrf_dtd.c +++ b/tests/testing_zpotrf_dtd.c @@ -2,6 +2,7 @@ * Copyright (c) 2013-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -73,7 +74,8 @@ int main(int argc, char **argv) parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new(); /* Allocating data arrays to be used by comm engine */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -375,8 +377,7 @@ int main(int argc, char **argv) } /* Cleaning data arrays we allocated for communication */ - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_data_free(dcA.mat); dcA.mat = NULL; diff --git a/tests/testing_zpotrf_dtd_untied.c b/tests/testing_zpotrf_dtd_untied.c index 40c375a9..7d9a1c4f 100644 --- a/tests/testing_zpotrf_dtd_untied.c +++ b/tests/testing_zpotrf_dtd_untied.c @@ -2,6 +2,7 @@ * Copyright (c) 2015-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * @precisions normal z -> s d c * @@ -370,7 +371,8 @@ int main(int argc, char **argv) parsec_taskpool_t *dtd_tp = parsec_dtd_taskpool_new( ); /* Default type */ - parsec_arena_datatype_t *tile_full = parsec_dtd_create_arena_datatype(parsec, &TILE_FULL); + parsec_arena_datatype_t *tile_full = parsec_arena_datatype_new(); + parsec_dtd_attach_arena_datatype(parsec, tile_full, &TILE_FULL); dplasma_add2arena_tile( tile_full, dcA.super.mb*dcA.super.nb*sizeof(dplasma_complex64_t), PARSEC_ARENA_ALIGNMENT_SSE, @@ -475,8 +477,7 @@ int main(int argc, char **argv) parsec_tiled_matrix_destroy( (parsec_tiled_matrix_t*)&dcX ); } - dplasma_matrix_del2arena( tile_full ); - parsec_dtd_destroy_arena_datatype(parsec, TILE_FULL); + parsec_dtd_free_arena_datatype(parsec, TILE_FULL); parsec_dtd_data_collection_fini( (parsec_data_collection_t *)&dcA ); parsec_data_free(dcA.mat); dcA.mat = NULL; parsec_tiled_matrix_destroy( (parsec_tiled_matrix_t*)&dcA); From 0164da05b7b7ee21a2f35eae6a9b5a78222bb7b2 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 21 May 2026 01:32:08 -0400 Subject: [PATCH 4/5] Update to the new parsec public API. Signed-off-by: George Bosilca --- src/cuda/lapack_cuda_stage_in.c | 9 +++++---- src/dplasmaaux.c | 2 +- src/utils/dplasma_lapack_adtt.c | 3 +-- src/zgebrd_ge2gb_wrapper.c | 3 +-- src/zgetrf_1d.jdf | 2 +- src/zgetrf_1d_wrapper.c | 2 +- src/zgetrf_qrf.jdf | 2 +- src/zgetrf_qrf_wrapper.c | 2 +- 8 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/cuda/lapack_cuda_stage_in.c b/src/cuda/lapack_cuda_stage_in.c index f2f48ced..d85a9acd 100644 --- a/src/cuda/lapack_cuda_stage_in.c +++ b/src/cuda/lapack_cuda_stage_in.c @@ -2,6 +2,7 @@ * Copyright (c) 2020-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * */ @@ -33,14 +34,14 @@ dplasma_cuda_lapack_stage_in(parsec_gpu_task_t *gtask, if(flow_mask & (1U << i)){ copy_in = task->data[i].data_in; copy_out = task->data[i].data_out; - ddc = (dplasma_data_collection_t*)gtask->flow_dc[i]; + ddc = (dplasma_data_collection_t*)gtask->flow_info[i].flow_dc; assert(ddc != NULL); elem_sz = parsec_datadist_getsizeoftype(ddc->dc_original->mtype); in_elem_dev = (parsec_device_gpu_module_t*)parsec_mca_device_get( copy_in->device_index); if( (in_elem_dev->super.type == PARSEC_DEV_CUDA) || (ddc->dc_original->storage != PARSEC_MATRIX_LAPACK)){ ret = (cudaError_t)cudaMemcpyAsync( copy_out->device_private, copy_in->device_private, - gtask->flow_nb_elts[i], + gtask->flow_info[i].flow_span, (in_elem_dev->super.type != PARSEC_DEV_CUDA)? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, cuda_stream->cuda_stream); @@ -109,7 +110,7 @@ dplasma_cuda_lapack_stage_out(parsec_gpu_task_t *gtask, if(flow_mask & (1U << i)){ copy_in = task->data[i].data_out; copy_out = copy_in->original->device_copies[0]; - ddc = (dplasma_data_collection_t*)gtask->flow_dc[i]; + ddc = (dplasma_data_collection_t*)gtask->flow_info[i].flow_dc; assert(ddc != NULL); elem_sz = parsec_datadist_getsizeoftype(ddc->dc_original->mtype); out_elem_dev = (parsec_device_gpu_module_t*)parsec_mca_device_get( copy_out->device_index); @@ -117,7 +118,7 @@ dplasma_cuda_lapack_stage_out(parsec_gpu_task_t *gtask, if( (out_elem_dev->super.type == PARSEC_DEV_CUDA) || (ddc->dc_original->storage != PARSEC_MATRIX_LAPACK)){ ret = (cudaError_t)cudaMemcpyAsync( copy_out->device_private, copy_in->device_private, - gtask->flow_nb_elts[i], + gtask->flow_info[i].flow_span, out_elem_dev->super.type != PARSEC_DEV_CUDA ? cudaMemcpyDeviceToHost : cudaMemcpyDeviceToDevice, cuda_stream->cuda_stream); diff --git a/src/dplasmaaux.c b/src/dplasmaaux.c index 5a39a41b..126456d0 100644 --- a/src/dplasmaaux.c +++ b/src/dplasmaaux.c @@ -98,7 +98,7 @@ dplasma_aux_getGEMMLookahead( parsec_tiled_matrix_t *A ) * look ahead based on the global information to get the same one on all * nodes. */ - int nbunits = vpmap_get_nb_total_threads() * A->super.nodes; + int nbunits = parsec_vpmap_get_nb_total_threads() * A->super.nodes; double alpha = 3. * (double)nbunits / ( A->mt * A->nt ); if ( A->super.nodes == 1 ) { diff --git a/src/utils/dplasma_lapack_adtt.c b/src/utils/dplasma_lapack_adtt.c index 2d47988a..1b4a0341 100644 --- a/src/utils/dplasma_lapack_adtt.c +++ b/src/utils/dplasma_lapack_adtt.c @@ -288,7 +288,7 @@ static parsec_data_t* data_of(parsec_data_collection_t *desc, ...) "data_of CP %p [old type %p] loc %d -> dtt %p target_shape %d layout %d", cp, cp->dtt, loc, adt->opaque_dtt, info.shape, info.layout); dt = parsec_data_create_with_type( dt->dc, - dt->key, cp->device_private, dt->nb_elts, + dt->key, cp->device_private, dt->span, adt->opaque_dtt); } } @@ -392,4 +392,3 @@ void dplasma_unwrap_data_collection(dplasma_data_collection_t *ddc) { free(ddc); } - diff --git a/src/zgebrd_ge2gb_wrapper.c b/src/zgebrd_ge2gb_wrapper.c index f1b93f10..32766c08 100644 --- a/src/zgebrd_ge2gb_wrapper.c +++ b/src/zgebrd_ge2gb_wrapper.c @@ -310,7 +310,7 @@ dplasma_zgebrd_ge2gb_New( int ib, dplasma_qrtree_t *qrtre0, *qrtree, *lqtree; int P, Q, cores; - cores = dplasma_imax( vpmap_get_nb_total_threads(), 1 ); + cores = dplasma_imax( parsec_vpmap_get_nb_total_threads(), 1 ); qrtree = malloc( sizeof(dplasma_qrtree_t) ); lqtree = malloc( sizeof(dplasma_qrtree_t) ); @@ -613,4 +613,3 @@ dplasma_zgebrd_ge2gb( parsec_context_t *parsec, int ib, return 0; } - diff --git a/src/zgetrf_1d.jdf b/src/zgetrf_1d.jdf index 9541b481..2af38062 100644 --- a/src/zgetrf_1d.jdf +++ b/src/zgetrf_1d.jdf @@ -57,7 +57,7 @@ descIPIV [type = "parsec_tiled_matrix_t*" hidden = on default = "((dplasma_da INFO [type = "int*"] -nbmaxthrd [type = "int" hidden=on default="( dplasma_imax( 1, dplasma_imin( vpmap_get_nb_threads_in_vp(0) - 1, 48 ) ) )" ] /* 48 is the actual limit of the kernel */ +nbmaxthrd [type = "int" hidden=on default="( dplasma_imax( 1, dplasma_imin( parsec_vpmap_get_vp_threads(0) - 1, 48 ) ) )" ] /* 48 is the actual limit of the kernel */ KT [type = "int" hidden=on default="( dplasma_imin( descA->mt, descA->nt )-1 )" ] getrfdata [type = "CORE_zgetrf_data_t *" hidden = on default = "NULL"] diff --git a/src/zgetrf_1d_wrapper.c b/src/zgetrf_1d_wrapper.c index 8ce95db9..0956d26c 100644 --- a/src/zgetrf_1d_wrapper.c +++ b/src/zgetrf_1d_wrapper.c @@ -83,7 +83,7 @@ dplasma_zgetrf_1d_New( parsec_tiled_matrix_t *A, int *INFO ) { parsec_zgetrf_1d_taskpool_t *parsec_getrf_1d; - int nbthreads = dplasma_imax( 1, vpmap_get_nb_threads_in_vp(0) - 1 ); + int nbthreads = dplasma_imax( 1, parsec_vpmap_get_vp_threads(0) - 1 ); dplasma_data_collection_t * ddc_A = dplasma_wrap_data_collection((parsec_tiled_matrix_t*)A); dplasma_data_collection_t * ddc_IPIV = dplasma_wrap_data_collection((parsec_tiled_matrix_t*)IPIV); diff --git a/src/zgetrf_qrf.jdf b/src/zgetrf_qrf.jdf index 1e7ba47c..353fb362 100644 --- a/src/zgetrf_qrf.jdf +++ b/src/zgetrf_qrf.jdf @@ -52,7 +52,7 @@ INFO [type = "int*"] param_p [type = int default="((parsec_matrix_block_cyclic_t*)descA)->grid.rows" hidden=on ] param_q [type = int default="((parsec_matrix_block_cyclic_t*)descA)->grid.cols" hidden=on ] minMNT [type = int default="dplasma_imin( descA->mt-1, descA->nt-1 )" hidden=on ] -nbmaxthrd [type = "int" default="( dplasma_imax( 1, dplasma_imin( vpmap_get_nb_threads_in_vp(0) - 1, 48 ) ) )" hidden=on] /* 48 is the actual limit of the kernel */ +nbmaxthrd [type = "int" default="( dplasma_imax( 1, dplasma_imin( parsec_vpmap_get_vp_threads(0) - 1, 48 ) ) )" hidden=on] /* 48 is the actual limit of the kernel */ getrfdata [type = "CORE_zgetrf_data_t *" hidden = on default = "NULL"] diff --git a/src/zgetrf_qrf_wrapper.c b/src/zgetrf_qrf_wrapper.c index 37dc0c21..8a4c721d 100644 --- a/src/zgetrf_qrf_wrapper.c +++ b/src/zgetrf_qrf_wrapper.c @@ -166,7 +166,7 @@ dplasma_zgetrf_qrf_New( dplasma_qrtree_t *qrtree, int ib = TS->mb; size_t sizeW = 1; size_t sizeReduceVec = 1; - int nbthreads = dplasma_imax( 1, vpmap_get_nb_threads_in_vp(0) - 1 ); + int nbthreads = dplasma_imax( 1, parsec_vpmap_get_vp_threads(0) - 1 ); /* * Compute W size according to criteria used. From 835a1a3e6fffec5f1d080f960dda1d8fb0dd467e Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 21 May 2026 01:41:29 -0400 Subject: [PATCH 5/5] Update DPLASMA parsec_init argument handling Adapt DPLASMA callers to the updated PaRSEC initialization contract, where parsec_init receives only PaRSEC-specific arguments and no synthetic argv[0]. Extract only arguments following -- before calling parsec_init in the test and contrib common setup paths Pass NULL argument vectors when no PaRSEC options are provided Update the QR example to pass only the explicit PaRSEC argument slice Stop constructing a fake application argv in the ScaLAPACK wrapper initializer Signed-off-by: George Bosilca --- contrib/build_with_dplasma/common.c | 37 ++++++++++++------- examples/dqr_driver.c | 18 ++++++++- .../dplasma_wrapper_parsec_init.c | 21 +---------- tests/common.c | 37 ++++++++++++------- 4 files changed, 66 insertions(+), 47 deletions(-) diff --git a/contrib/build_with_dplasma/common.c b/contrib/build_with_dplasma/common.c index 165f9392..4137b489 100644 --- a/contrib/build_with_dplasma/common.c +++ b/contrib/build_with_dplasma/common.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * */ #include "parsec/runtime.h" @@ -548,6 +549,23 @@ static void print_arguments(int* iparam) } } +static void get_parsec_arguments(int argc, char **argv, + int *parsec_argc, char ***parsec_argv) +{ + int idx; + + *parsec_argc = 0; + *parsec_argv = NULL; + + for( idx = 1; idx < argc; idx++ ) { + if( 0 == strcmp(argv[idx], "--") ) { + *parsec_argc = argc - idx - 1; + *parsec_argv = (0 < *parsec_argc) ? &argv[idx + 1] : NULL; + return; + } + } +} + @@ -633,19 +651,13 @@ parsec_context_t* setup_parsec(int argc, char **argv, int *iparam) TIME_START(); - /* Once we got out arguments, we should pass whatever is left down */ - int parsec_argc, idx; - char** parsec_argv = (char**)calloc(argc, sizeof(char*)); - parsec_argv[0] = argv[0]; /* the app name */ - for( idx = parsec_argc = 1; - (idx < argc) && (0 != strcmp(argv[idx], "--")); idx++); - if( idx != argc ) { - for( parsec_argc = 1, idx++; idx < argc; - parsec_argv[parsec_argc] = argv[idx], parsec_argc++, idx++); - } + /* PaRSEC only accepts its own arguments, without argv[0] or "--". */ + int parsec_argc; + char **parsec_argv; + get_parsec_arguments(argc, argv, &parsec_argc, &parsec_argv); parsec_context_t* ctx = parsec_init(iparam[IPARAM_NCORES], - &parsec_argc, &parsec_argv); - free(parsec_argv); + (0 < parsec_argc) ? &parsec_argc : NULL, + (0 < parsec_argc) ? &parsec_argv : NULL); if( NULL == ctx ) { /* Failed to correctly initialize. In a correct scenario report * upstream, but in this particular case bail out. @@ -678,4 +690,3 @@ void cleanup_parsec(parsec_context_t* parsec, int *iparam) #endif (void)iparam; } - diff --git a/examples/dqr_driver.c b/examples/dqr_driver.c index aa06bac8..490e48d5 100644 --- a/examples/dqr_driver.c +++ b/examples/dqr_driver.c @@ -2,6 +2,7 @@ * Copyright (c) 2016-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. * * Compile with $CC $(pkg-config --cflags dplasma) -c dqr_driver.c * Link with $CC $(pkg-config --libs dplasma) -o dqr_driver dqr_driver.o @@ -11,6 +12,7 @@ #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" #include #include +#include #include int main(int argc, char ** argv) @@ -23,6 +25,8 @@ int main(int argc, char ** argv) int P = 0; int ch; int cores = -1; + int parsec_argc = 0; + char **parsec_argv = NULL; parsec_matrix_block_cyclic_t dcA; parsec_matrix_block_cyclic_t dcWork; @@ -50,6 +54,14 @@ int main(int argc, char ** argv) rank = 0; #endif + for( ch = 1; ch < argc; ch++ ) { + if( 0 == strcmp(argv[ch], "--") ) { + parsec_argc = argc - ch - 1; + parsec_argv = (0 < parsec_argc) ? &argv[ch + 1] : NULL; + break; + } + } + while ((ch = getopt_long(argc, argv, "M:N:m:n:P:c:h", longopts, NULL)) != -1) switch (ch) { case 'M': @@ -101,8 +113,10 @@ int main(int argc, char ** argv) /** Initialize PaRSEC with the required number of cores, - * and pass all arguments after '--' to PaRSEC, if there are some */ - parsec = parsec_init(cores, &argc, &argv); + * and pass only PaRSEC arguments after '--', without argv[0]. */ + parsec = parsec_init(cores, + (0 < parsec_argc) ? &parsec_argc : NULL, + (0 < parsec_argc) ? &parsec_argv : NULL); /** Declare a Matrix A as a (4, 1)-2D-Tile cyclic matrix of size M x N real * doubles, tiled in tiles of mb x nb, and distributed over PxQ processes, diff --git a/src/scalapack_wrappers/dplasma_wrapper_parsec_init.c b/src/scalapack_wrappers/dplasma_wrapper_parsec_init.c index ad5e64e0..57fe52a0 100644 --- a/src/scalapack_wrappers/dplasma_wrapper_parsec_init.c +++ b/src/scalapack_wrappers/dplasma_wrapper_parsec_init.c @@ -2,13 +2,12 @@ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2026 NVIDIA Corporation. All rights reserved. */ #include "common.h" -#define APP_NAME "SCALAPACK_WRAPPED_CALL" - static void parsec_init_wrapper_internal(){ if( parsec_ctx == NULL ){ @@ -27,16 +26,6 @@ static void parsec_init_wrapper_internal(){ if(var_ncores!=NULL){ ncores = atoi(var_ncores); } - int parsec_argc = 1; - - char** parsec_argv = (char**)calloc(parsec_argc+1, sizeof(char*)); - int i; - for(i=0; i