Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cmake_modules/FindLAPACKE.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
# Copyright (c) 2019-2024 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2026 NVIDIA Corporation. All rights reserved.
#
# $COPYRIGHT$
#
Expand Down Expand Up @@ -120,6 +121,19 @@ set(LAPACKE_FIND_ALL_COMPONENTS 0)
# ==============================================================================

macro(_find_library_with_header component incname)
if(LAPACKE_${component}_LIB)
get_filename_component(_lapacke_lib_ext "${LAPACKE_${component}_LIB}" EXT)
if(IS_DIRECTORY "${LAPACKE_${component}_LIB}" AND NOT _lapacke_lib_ext STREQUAL ".framework")
list(APPEND LAPACKE_SEARCH_PATHS "${LAPACKE_${component}_LIB}")
get_filename_component(_lapacke_lib_parent "${LAPACKE_${component}_LIB}" DIRECTORY)
list(APPEND LAPACKE_SEARCH_PATHS "${_lapacke_lib_parent}")
unset(LAPACKE_${component}_LIB CACHE)
unset(LAPACKE_${component}_LIB)
unset(_lapacke_lib_parent)
endif()
unset(_lapacke_lib_ext)
endif()

find_library(LAPACKE_${component}_LIB
NAMES ${ARGN}
NAMES_PER_DIR
Expand Down
37 changes: 24 additions & 13 deletions contrib/build_with_dplasma/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2009-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2026 NVIDIA Corporation. All rights reserved.
*
*/
#include "parsec/runtime.h"
Expand Down Expand Up @@ -548,6 +549,23 @@ static void print_arguments(int* iparam)
}
}

static void get_parsec_arguments(int argc, char **argv,
int *parsec_argc, char ***parsec_argv)
{
int idx;

*parsec_argc = 0;
*parsec_argv = NULL;

for( idx = 1; idx < argc; idx++ ) {
if( 0 == strcmp(argv[idx], "--") ) {
*parsec_argc = argc - idx - 1;
*parsec_argv = (0 < *parsec_argc) ? &argv[idx + 1] : NULL;
return;
}
}
}




Expand Down Expand Up @@ -633,19 +651,13 @@ parsec_context_t* setup_parsec(int argc, char **argv, int *iparam)

TIME_START();

/* Once we got out arguments, we should pass whatever is left down */
int parsec_argc, idx;
char** parsec_argv = (char**)calloc(argc, sizeof(char*));
parsec_argv[0] = argv[0]; /* the app name */
for( idx = parsec_argc = 1;
(idx < argc) && (0 != strcmp(argv[idx], "--")); idx++);
if( idx != argc ) {
for( parsec_argc = 1, idx++; idx < argc;
parsec_argv[parsec_argc] = argv[idx], parsec_argc++, idx++);
}
/* PaRSEC only accepts its own arguments, without argv[0] or "--". */
int parsec_argc;
char **parsec_argv;
get_parsec_arguments(argc, argv, &parsec_argc, &parsec_argv);
parsec_context_t* ctx = parsec_init(iparam[IPARAM_NCORES],
&parsec_argc, &parsec_argv);
free(parsec_argv);
(0 < parsec_argc) ? &parsec_argc : NULL,
(0 < parsec_argc) ? &parsec_argv : NULL);
if( NULL == ctx ) {
/* Failed to correctly initialize. In a correct scenario report
* upstream, but in this particular case bail out.
Expand Down Expand Up @@ -678,4 +690,3 @@ void cleanup_parsec(parsec_context_t* parsec, int *iparam)
#endif
(void)iparam;
}

18 changes: 16 additions & 2 deletions examples/dqr_driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2016-2022 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2026 NVIDIA Corporation. All rights reserved.
*
* Compile with $CC $(pkg-config --cflags dplasma) -c dqr_driver.c
* Link with $CC $(pkg-config --libs dplasma) -o dqr_driver dqr_driver.o
Expand All @@ -11,6 +12,7 @@
#include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h"
#include <getopt.h>
#include <math.h>
#include <string.h>
#include <sys/time.h>

int main(int argc, char ** argv)
Expand All @@ -23,6 +25,8 @@ int main(int argc, char ** argv)
int P = 0;
int ch;
int cores = -1;
int parsec_argc = 0;
char **parsec_argv = NULL;
parsec_matrix_block_cyclic_t dcA;
parsec_matrix_block_cyclic_t dcWork;

Expand Down Expand Up @@ -50,6 +54,14 @@ int main(int argc, char ** argv)
rank = 0;
#endif

for( ch = 1; ch < argc; ch++ ) {
if( 0 == strcmp(argv[ch], "--") ) {
parsec_argc = argc - ch - 1;
parsec_argv = (0 < parsec_argc) ? &argv[ch + 1] : NULL;
break;
}
}

while ((ch = getopt_long(argc, argv, "M:N:m:n:P:c:h", longopts, NULL)) != -1)
switch (ch) {
case 'M':
Expand Down Expand Up @@ -101,8 +113,10 @@ int main(int argc, char ** argv)


/** Initialize PaRSEC with the required number of cores,
* and pass all arguments after '--' to PaRSEC, if there are some */
parsec = parsec_init(cores, &argc, &argv);
* and pass only PaRSEC arguments after '--', without argv[0]. */
parsec = parsec_init(cores,
(0 < parsec_argc) ? &parsec_argc : NULL,
(0 < parsec_argc) ? &parsec_argv : NULL);

/** Declare a Matrix A as a (4, 1)-2D-Tile cyclic matrix of size M x N real
* doubles, tiled in tiles of mb x nb, and distributed over PxQ processes,
Expand Down
2 changes: 1 addition & 1 deletion parsec
Submodule parsec updated 338 files
9 changes: 5 additions & 4 deletions src/cuda/lapack_cuda_stage_in.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2020-2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2026 NVIDIA Corporation. All rights reserved.
*
*/

Expand Down Expand Up @@ -33,14 +34,14 @@ dplasma_cuda_lapack_stage_in(parsec_gpu_task_t *gtask,
if(flow_mask & (1U << i)){
copy_in = task->data[i].data_in;
copy_out = task->data[i].data_out;
ddc = (dplasma_data_collection_t*)gtask->flow_dc[i];
ddc = (dplasma_data_collection_t*)gtask->flow_info[i].flow_dc;
assert(ddc != NULL);
elem_sz = parsec_datadist_getsizeoftype(ddc->dc_original->mtype);
in_elem_dev = (parsec_device_gpu_module_t*)parsec_mca_device_get( copy_in->device_index);
if( (in_elem_dev->super.type == PARSEC_DEV_CUDA) || (ddc->dc_original->storage != PARSEC_MATRIX_LAPACK)){
ret = (cudaError_t)cudaMemcpyAsync( copy_out->device_private,
copy_in->device_private,
gtask->flow_nb_elts[i],
gtask->flow_info[i].flow_span,
(in_elem_dev->super.type != PARSEC_DEV_CUDA)?
cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice,
cuda_stream->cuda_stream);
Expand Down Expand Up @@ -109,15 +110,15 @@ dplasma_cuda_lapack_stage_out(parsec_gpu_task_t *gtask,
if(flow_mask & (1U << i)){
copy_in = task->data[i].data_out;
copy_out = copy_in->original->device_copies[0];
ddc = (dplasma_data_collection_t*)gtask->flow_dc[i];
ddc = (dplasma_data_collection_t*)gtask->flow_info[i].flow_dc;
assert(ddc != NULL);
elem_sz = parsec_datadist_getsizeoftype(ddc->dc_original->mtype);
out_elem_dev = (parsec_device_gpu_module_t*)parsec_mca_device_get( copy_out->device_index);

if( (out_elem_dev->super.type == PARSEC_DEV_CUDA) || (ddc->dc_original->storage != PARSEC_MATRIX_LAPACK)){
ret = (cudaError_t)cudaMemcpyAsync( copy_out->device_private,
copy_in->device_private,
gtask->flow_nb_elts[i],
gtask->flow_info[i].flow_span,
out_elem_dev->super.type != PARSEC_DEV_CUDA ?
cudaMemcpyDeviceToHost : cudaMemcpyDeviceToDevice,
cuda_stream->cuda_stream);
Expand Down
2 changes: 1 addition & 1 deletion src/dplasmaaux.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ dplasma_aux_getGEMMLookahead( parsec_tiled_matrix_t *A )
* look ahead based on the global information to get the same one on all
* nodes.
*/
int nbunits = vpmap_get_nb_total_threads() * A->super.nodes;
int nbunits = parsec_vpmap_get_nb_total_threads() * A->super.nodes;
double alpha = 3. * (double)nbunits / ( A->mt * A->nt );

if ( A->super.nodes == 1 ) {
Expand Down
21 changes: 2 additions & 19 deletions src/scalapack_wrappers/dplasma_wrapper_parsec_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
* Copyright (c) 2021-2023 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2026 NVIDIA Corporation. All rights reserved.
*/

#include "common.h"


#define APP_NAME "SCALAPACK_WRAPPED_CALL"

static void parsec_init_wrapper_internal(){
if( parsec_ctx == NULL ){

Expand All @@ -27,16 +26,6 @@ static void parsec_init_wrapper_internal(){
if(var_ncores!=NULL){
ncores = atoi(var_ncores);
}
int parsec_argc = 1;

char** parsec_argv = (char**)calloc(parsec_argc+1, sizeof(char*));
int i;
for(i=0; i<parsec_argc; i++){
parsec_argv[i] = (char*)malloc(sizeof(char)*80);
}
parsec_argv[parsec_argc] = NULL;
sprintf(parsec_argv[0], APP_NAME);

int rank,size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
Expand All @@ -45,17 +34,12 @@ static void parsec_init_wrapper_internal(){
SYNC_TIME_START();
#endif

parsec_ctx = parsec_init(ncores, &parsec_argc, &parsec_argv);
parsec_ctx = parsec_init(ncores, NULL, NULL);

#ifdef MEASURE_INTERNAL_TIMES
SYNC_TIME_PRINT(rank, ("PaRSEC initialized\n"));
#endif

for(i=0; i<parsec_argc; i++){
free(parsec_argv[i]);
}
free(parsec_argv);

}
}

Expand Down Expand Up @@ -128,4 +112,3 @@ void parsec_wrapper_devices_reset_load_(void){
parsec_devices_reset_load(parsec_ctx);
}
}

3 changes: 1 addition & 2 deletions src/utils/dplasma_lapack_adtt.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ static parsec_data_t* data_of(parsec_data_collection_t *desc, ...)
"data_of CP %p [old type %p] loc %d -> dtt %p target_shape %d layout %d",
cp, cp->dtt, loc, adt->opaque_dtt, info.shape, info.layout);
dt = parsec_data_create_with_type( dt->dc,
dt->key, cp->device_private, dt->nb_elts,
dt->key, cp->device_private, dt->span,
adt->opaque_dtt);
}
}
Expand Down Expand Up @@ -392,4 +392,3 @@ void dplasma_unwrap_data_collection(dplasma_data_collection_t *ddc)
{
free(ddc);
}

3 changes: 1 addition & 2 deletions src/zgebrd_ge2gb_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ dplasma_zgebrd_ge2gb_New( int ib,
dplasma_qrtree_t *qrtre0, *qrtree, *lqtree;
int P, Q, cores;

cores = dplasma_imax( vpmap_get_nb_total_threads(), 1 );
cores = dplasma_imax( parsec_vpmap_get_nb_total_threads(), 1 );
qrtree = malloc( sizeof(dplasma_qrtree_t) );
lqtree = malloc( sizeof(dplasma_qrtree_t) );

Expand Down Expand Up @@ -613,4 +613,3 @@ dplasma_zgebrd_ge2gb( parsec_context_t *parsec, int ib,

return 0;
}

2 changes: 1 addition & 1 deletion src/zgetrf_1d.jdf
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ descIPIV [type = "parsec_tiled_matrix_t*" hidden = on default = "((dplasma_da

INFO [type = "int*"]

nbmaxthrd [type = "int" hidden=on default="( dplasma_imax( 1, dplasma_imin( vpmap_get_nb_threads_in_vp(0) - 1, 48 ) ) )" ] /* 48 is the actual limit of the kernel */
nbmaxthrd [type = "int" hidden=on default="( dplasma_imax( 1, dplasma_imin( parsec_vpmap_get_vp_threads(0) - 1, 48 ) ) )" ] /* 48 is the actual limit of the kernel */
KT [type = "int" hidden=on default="( dplasma_imin( descA->mt, descA->nt )-1 )" ]

getrfdata [type = "CORE_zgetrf_data_t *" hidden = on default = "NULL"]
Expand Down
2 changes: 1 addition & 1 deletion src/zgetrf_1d_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ dplasma_zgetrf_1d_New( parsec_tiled_matrix_t *A,
int *INFO )
{
parsec_zgetrf_1d_taskpool_t *parsec_getrf_1d;
int nbthreads = dplasma_imax( 1, vpmap_get_nb_threads_in_vp(0) - 1 );
int nbthreads = dplasma_imax( 1, parsec_vpmap_get_vp_threads(0) - 1 );
dplasma_data_collection_t * ddc_A = dplasma_wrap_data_collection((parsec_tiled_matrix_t*)A);
dplasma_data_collection_t * ddc_IPIV = dplasma_wrap_data_collection((parsec_tiled_matrix_t*)IPIV);

Expand Down
2 changes: 1 addition & 1 deletion src/zgetrf_qrf.jdf
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ INFO [type = "int*"]
param_p [type = int default="((parsec_matrix_block_cyclic_t*)descA)->grid.rows" hidden=on ]
param_q [type = int default="((parsec_matrix_block_cyclic_t*)descA)->grid.cols" hidden=on ]
minMNT [type = int default="dplasma_imin( descA->mt-1, descA->nt-1 )" hidden=on ]
nbmaxthrd [type = "int" default="( dplasma_imax( 1, dplasma_imin( vpmap_get_nb_threads_in_vp(0) - 1, 48 ) ) )" hidden=on] /* 48 is the actual limit of the kernel */
nbmaxthrd [type = "int" default="( dplasma_imax( 1, dplasma_imin( parsec_vpmap_get_vp_threads(0) - 1, 48 ) ) )" hidden=on] /* 48 is the actual limit of the kernel */

getrfdata [type = "CORE_zgetrf_data_t *" hidden = on default = "NULL"]

Expand Down
2 changes: 1 addition & 1 deletion src/zgetrf_qrf_wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ dplasma_zgetrf_qrf_New( dplasma_qrtree_t *qrtree,
int ib = TS->mb;
size_t sizeW = 1;
size_t sizeReduceVec = 1;
int nbthreads = dplasma_imax( 1, vpmap_get_nb_threads_in_vp(0) - 1 );
int nbthreads = dplasma_imax( 1, parsec_vpmap_get_vp_threads(0) - 1 );

/*
* Compute W size according to criteria used.
Expand Down
37 changes: 24 additions & 13 deletions tests/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2009-2024 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2026 NVIDIA Corporation. All rights reserved.
*
*/
#include "parsec/runtime.h"
Expand Down Expand Up @@ -580,6 +581,23 @@ static void print_arguments(int* iparam)
}
}

static void get_parsec_arguments(int argc, char **argv,
int *parsec_argc, char ***parsec_argv)
{
int idx;

*parsec_argc = 0;
*parsec_argv = NULL;

for( idx = 1; idx < argc; idx++ ) {
if( 0 == strcmp(argv[idx], "--") ) {
*parsec_argc = argc - idx - 1;
*parsec_argv = (0 < *parsec_argc) ? &argv[idx + 1] : NULL;
return;
}
}
}




Expand Down Expand Up @@ -671,19 +689,13 @@ parsec_context_t* setup_parsec(int argc, char **argv, int *iparam)

TIME_START();

/* Once we got out arguments, we should pass whatever is left down */
int parsec_argc, idx;
char** parsec_argv = (char**)calloc(argc, sizeof(char*));
parsec_argv[0] = argv[0]; /* the app name */
for( idx = parsec_argc = 1;
(idx < argc) && (0 != strcmp(argv[idx], "--")); idx++);
if( idx != argc ) {
for( parsec_argc = 1, idx++; idx < argc;
parsec_argv[parsec_argc] = argv[idx], parsec_argc++, idx++);
}
/* PaRSEC only accepts its own arguments, without argv[0] or "--". */
int parsec_argc;
char **parsec_argv;
get_parsec_arguments(argc, argv, &parsec_argc, &parsec_argv);
parsec_context_t* ctx = parsec_init(iparam[IPARAM_NCORES],
&parsec_argc, &parsec_argv);
free(parsec_argv);
(0 < parsec_argc) ? &parsec_argc : NULL,
(0 < parsec_argc) ? &parsec_argv : NULL);
if( NULL == ctx ) {
/* Failed to correctly initialize. In a correct scenario report
* upstream, but in this particular case bail out.
Expand Down Expand Up @@ -778,4 +790,3 @@ void cleanup_parsec(parsec_context_t* parsec, int *iparam)
#endif
(void)iparam;
}

Loading
Loading