From e69ae74deb6c992cccfe165e8f81b201f5854074 Mon Sep 17 00:00:00 2001 From: Adam Perdeusz Date: Mon, 17 Jan 2022 14:25:37 +0100 Subject: [PATCH] Fixes for omp declare target feature --- test/offloading/amdgpu/swdev288446.F90 | 49 +++++++++++++++++++++++++ tools/flang2/flang2exe/exp_rte.cpp | 2 +- tools/flang2/flang2exe/llutil.cpp | 5 +++ tools/flang2/flang2exe/llutil.h | 5 +++ tools/flang2/flang2exe/main.cpp | 3 +- tools/flang2/flang2exe/ompaccel.cpp | 3 +- tools/flang2/flang2exe/ompaccel_x86.cpp | 20 +++++----- 7 files changed, 75 insertions(+), 12 deletions(-) create mode 100644 test/offloading/amdgpu/swdev288446.F90 diff --git a/test/offloading/amdgpu/swdev288446.F90 b/test/offloading/amdgpu/swdev288446.F90 new file mode 100644 index 0000000000..743fed02a3 --- /dev/null +++ b/test/offloading/amdgpu/swdev288446.F90 @@ -0,0 +1,49 @@ +subroutine _compute_dev(sum_dev, a_dev, b_dev, c_dev) + integer i,j,k + REAL(8), dimension(100,100,100), intent(in) :: b_dev, c_dev + REAL(8), dimension(100,100,100), intent(out) :: a_dev + REAL(8), intent(inout) :: sum_dev + integer :: nsize = 100 + +!$omp declare target +!$omp parallel do + do i=1,nsize + do j=1,nsize + do k=1,nsize + a_dev(i,j,k) = b_dev(i,j,k) * c_dev(i,j,k) * i * nsize*nsize + j * nsize + k + end do + end do + end do +!$xomp end target +end subroutine _compute_dev + +program main + integer :: nsize + REAL(8) :: sum_dev = 0 + REAL(8), dimension(100,100,100) :: a_dev, b_dev, c_dev + integer :: nsize = 100 +!$omp declare target(_compute_dev) + do i=1,nsize + do j=1,nsize + do k=1,nsize + b_dev(i,j,k) = 1 + c_dev(i,j,k) = 2 + end do + end do + end do + +!$omp target update to(b_dev,c_dev,sum_dev,nsize) +!$omp target + CALL _compute_dev(sum_dev, a_dev, b_dev, c_dev) +!$omp end target +!$omp target update from(a_dev,sum_dev) + + do i=1,3 + do j=1,3 + do k=1,3 + print *, a_dev(i,j,k) + end do + end do + end do +end program main + diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index ce9b7fdcb2..1752229340 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -2157,7 +2157,7 @@ exp_end(ILM *ilmp, int curilm, bool is_func) #ifdef OMP_OFFLOAD_AMD int ilix; if (flg.omptarget && !is_func) { - if (XBIT(232, 0x40) && gbl.ompaccel_intarget) { + if (XBIT(232, 0x40) && gbl.ompaccel_intarget && is_gpu_output_file() ) { ilix = ll_make_kmpc_spmd_kernel_deinit_v2(); iltb.callfg = 1; chk_block(ilix); diff --git a/tools/flang2/flang2exe/llutil.cpp b/tools/flang2/flang2exe/llutil.cpp index e4cd39f724..3dc7a96ae9 100644 --- a/tools/flang2/flang2exe/llutil.cpp +++ b/tools/flang2/flang2exe/llutil.cpp @@ -1783,7 +1783,12 @@ set_llasm_output_file(FILE *fd) FILE *get_llasm_output_file() { return LLVMFIL; } + #endif // OMP_OFFLOAD_LLVM + +bool is_gpu_output_file() { + return (LLVMFIL == gbl.ompaccfile); +} // AOCC End void diff --git a/tools/flang2/flang2exe/llutil.h b/tools/flang2/flang2exe/llutil.h index 52f2ba2d26..44f9251687 100644 --- a/tools/flang2/flang2exe/llutil.h +++ b/tools/flang2/flang2exe/llutil.h @@ -1620,11 +1620,16 @@ void use_cpu_output_file(void); */ void use_gpu_output_file(void); + // AOCC Begin /** \brief Return the current output file */ FILE *get_llasm_output_file(); +/** + \brief return true if gpu file is currently written + */ +bool is_gpu_output_file(); #ifdef OMP_OFFLOAD_AMD /** diff --git a/tools/flang2/flang2exe/main.cpp b/tools/flang2/flang2exe/main.cpp index 9de1e41d77..aebf71f3c8 100644 --- a/tools/flang2/flang2exe/main.cpp +++ b/tools/flang2/flang2exe/main.cpp @@ -370,8 +370,9 @@ process_input(char *argv0, bool *need_cuda_constructor) gbl.ompaccel_isdevice = true; schedule(); gbl.ompaccel_isdevice = orig; - if (flg.omptarget && !gbl.ompaccel_isdevice) + if (flg.omptarget && !gbl.ompaccel_isdevice) { schedule(); + } } else { schedule(); } diff --git a/tools/flang2/flang2exe/ompaccel.cpp b/tools/flang2/flang2exe/ompaccel.cpp index dcd5799b62..0f7782057f 100644 --- a/tools/flang2/flang2exe/ompaccel.cpp +++ b/tools/flang2/flang2exe/ompaccel.cpp @@ -2748,7 +2748,7 @@ exp_ompaccel_mploop(ILM *ilmp, int curilm) case KMP_DISTRIBUTE_STATIC: case KMP_DISTRIBUTE_STATIC_CHUNKED_CHUNKONE: // AOCC // AOCC begin - if (flg.x86_64_omptarget) { + if (flg.x86_64_omptarget || !is_gpu_output_file() ) { ili = ll_make_kmpc_for_static_init(&loop_args); // AOCC end } else { @@ -3709,6 +3709,7 @@ ompaccel_set_default_map(int maptype) { void ompaccel_set_target_declare() { OMPACCFUNCDEVP(gbl.currsub, 1); + gbl.ompaccel_intarget = true; } // AOCC End #endif diff --git a/tools/flang2/flang2exe/ompaccel_x86.cpp b/tools/flang2/flang2exe/ompaccel_x86.cpp index 12c7bfd605..8bafa7e6b3 100644 --- a/tools/flang2/flang2exe/ompaccel_x86.cpp +++ b/tools/flang2/flang2exe/ompaccel_x86.cpp @@ -174,18 +174,20 @@ void ompaccel_x86_fix_arg_types(SPTR func_sptr) { // Remember all the reduction symbols of func_sptr so that we can blacklist // them during the type update. std::set reduc_syms; - for (int i = 0; i < tinfo->n_reduction_symbols; i++) { - OMPACCEL_RED_SYM *reduction_sym = &(tinfo->reduction_symbols[i]); - OMPACCEL_SYM *ompaccel_sym = get_ompaccel_sym_for(reduction_sym, tinfo); + if (tinfo) { + for (int i = 0; i < tinfo->n_reduction_symbols; i++) { + OMPACCEL_RED_SYM *reduction_sym = &(tinfo->reduction_symbols[i]); + OMPACCEL_SYM *ompaccel_sym = get_ompaccel_sym_for(reduction_sym, tinfo); - if (!ompaccel_sym) - continue; - SPTR device_sym = ompaccel_sym->device_sym; + if (!ompaccel_sym) + continue; + SPTR device_sym = ompaccel_sym->device_sym; - if (PASSBYVALG(device_sym)) - continue; + if (PASSBYVALG(device_sym)) + continue; - reduc_syms.insert(device_sym); + reduc_syms.insert(device_sym); + } } for (int i = 0; i < func_paramct; i++) {