From ed2bc5ea274d751d75595a4bf33d4a21698f9b1e Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 08:34:05 -0500 Subject: [PATCH 01/33] Correct line markers have been recovered --- src/common/include/acc_macros.fpp | 29 +++++++++++++ src/common/include/omp_macros.fpp | 56 ++++++++++++++++++++++++++ src/common/include/parallel_macros.fpp | 27 +++++++++++++ src/simulation/m_ibm.fpp | 4 +- 4 files changed, 114 insertions(+), 2 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 147473250..4348cbe71 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -161,6 +161,35 @@ $:acc_end_directive #:enddef +#:def NEW_ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:set default_val = GEN_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set attach_val = GEN_ATTACH_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc parallel loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + $:acc_directive +#:enddef + #:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None) #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) #:assert isinstance(nohost, bool) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 1c2fb9c98..82b07a9ca 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -196,6 +196,62 @@ $:omp_end_directive #:enddef +#:def NEW_OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) + + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = OMP_PARALLELISM_STR(parallelism) + #:set default_val = OMP_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = OMP_COPY_STR(copy) + #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') + #:set copyout_val = OMP_COPYOUT_STR(copyout) + #:set create_val = OMP_CREATE_STR(create) + #:set no_create_val = OMP_NOCREATE_STR(no_create) + #:set present_val = OMP_PRESENT_STR(present) + #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) + #:set attach_val = OMP_MAP_STR('always,tofrom', attach) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #! Hardcoding the parallelism for now + + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + #:elif MFC_COMPILER == CCE_COMPILER_ID + #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' + #:elif MFC_COMPILER == AMD_COMPILER_ID + #:set omp_start_directive = '!$omp target teams distribute parallel do ' + #:else + #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + #:endif + + #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive +#:enddef + +#:def END_OMP_PARALLEL_LOOP() + + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set omp_end_directive = '!$omp end target teams loop' + #:elif MFC_COMPILER == CCE_COMPILER_ID + #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' + #:elif MFC_COMPILER == AMD_COMPILER_ID + #:set omp_end_directive = '!$omp end target teams distribute parallel do' + #:else + #:set omp_end_directive = '!$omp end target teams loop' + #:endif + + $:omp_end_directive +#:enddef + #:def OMP_ROUTINE(function_name, nohost, extraOmpArgs) #:assert isinstance(nohost, bool) #:if nohost == True diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 61bc30b43..0319d35b3 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -36,6 +36,33 @@ #endif #:enddef +#:def NEW_GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + +#if defined(MFC_OpenACC) + #:set directive = NEW_ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) +#elif defined(MFC_OpenMP) + #:set directive = NEW_OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) +#endif + + $:directive + +#:enddef + +#:def END_GPU_PARALLEL_LOOP() + + #:set acc_end_directive = '!$acc end parallel loop' + #:set omp_code = END_OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_end_directive +#elif defined(MFC_OpenMP) + $:omp_code +#endif +#:enddef + #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None) #:assert isinstance(cray_inline, bool) #:set acc_directive = ACC_ROUTINE(function_name=function_name, parallelism=parallelism, nohost=nohost, extraAccArgs=extraAccArgs) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 1a159c74d..4bf95a4ba 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -197,7 +197,7 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp if (num_gps > 0) then - #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') + $:NEW_GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') do i = 1, num_gps gp = ghost_points(i) @@ -365,7 +365,7 @@ contains end do end if end do - #:endcall GPU_PARALLEL_LOOP + ! $:END_GPU_PARALLEL_LOOP end if !Correct the state of the inner points in IBs From 35c57ebbe78cfb37466ca97d347d7ab0c74fdb5f Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 09:00:11 -0500 Subject: [PATCH 02/33] Made it through common --- src/common/include/parallel_macros.fpp | 29 +++-------- src/common/m_boundary_common.fpp | 72 +++++++++++++------------- src/common/m_chemistry.fpp | 8 +-- src/common/m_compute_levelset.fpp | 28 +++++----- src/common/m_finite_differences.fpp | 4 +- src/common/m_ib_patches.fpp | 28 +++++----- src/common/m_mpi_common.fpp | 72 +++++++++++++------------- src/common/m_phase_change.fpp | 4 +- src/common/m_variables_conversion.fpp | 8 +-- 9 files changed, 118 insertions(+), 135 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 0319d35b3..c903745f4 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -19,24 +19,7 @@ #:enddef -#:def GPU_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & - & default='present', firstprivate=None, reduction=None, reductionOp=None, & - & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & - & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) - - #:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) - #:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) - -#if defined(MFC_OpenACC) - $:acc_code -#elif defined(MFC_OpenMP) - $:omp_code -#else - $:code -#endif -#:enddef - -#:def NEW_GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & +#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) @@ -53,14 +36,14 @@ #:def END_GPU_PARALLEL_LOOP() - #:set acc_end_directive = '!$acc end parallel loop' - #:set omp_code = END_OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) - #if defined(MFC_OpenACC) - $:acc_end_directive + #:set end_directive = '!$acc end parallel loop' #elif defined(MFC_OpenMP) - $:omp_code + #:set end_directive = END_OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) #endif + + $:end_directive + #:enddef #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 81d4bc8d6..41bae5bdb 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -88,7 +88,7 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, -1)%sf(0, k, l))) @@ -112,13 +112,13 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (int(bc_type(1, 1)%sf(0, k, l))) @@ -142,7 +142,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Population of Buffers in y-direction @@ -152,7 +152,7 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, -1)%sf(k, 0, l))) @@ -179,13 +179,13 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (int(bc_type(2, 1)%sf(k, 0, l))) @@ -209,7 +209,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Population of Buffers in z-direction @@ -219,7 +219,7 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, -1)%sf(k, l, 0))) @@ -243,13 +243,13 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (int(bc_type(3, 1)%sf(k, l, 0))) @@ -273,7 +273,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! END: Population of Buffers in z-direction @@ -1165,7 +1165,7 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, -1)%sf(0, k, l)) @@ -1178,13 +1178,13 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1197,7 +1197,7 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (n == 0) return @@ -1206,7 +1206,7 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, -1)%sf(k, 0, l)) @@ -1219,13 +1219,13 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1238,7 +1238,7 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (p == 0) return @@ -1247,7 +1247,7 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, -1)%sf(k, l, 0)) @@ -1260,13 +1260,13 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 1)%sf(k, l, 0)) @@ -1279,7 +1279,7 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_populate_capillary_buffers @@ -1482,7 +1482,7 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, -1)%sf(0, k, l)) @@ -1501,14 +1501,14 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1527,7 +1527,7 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if @@ -1536,7 +1536,7 @@ contains else if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = idwbuff(1)%beg, idwbuff(1)%end select case (bc_type(2, -1)%sf(k, 0, l)) @@ -1555,14 +1555,14 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = idwbuff(1)%beg, idwbuff(1)%end select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1581,7 +1581,7 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (p == 0) then @@ -1589,7 +1589,7 @@ contains else if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = idwbuff(2)%beg, idwbuff(2)%end do k = idwbuff(1)%beg, idwbuff(1)%end select case (bc_type(3, -1)%sf(k, l, 0)) @@ -1608,13 +1608,13 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1) else - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = idwbuff(2)%beg, idwbuff(2)%end do k = idwbuff(1)%beg, idwbuff(1)%end select case (bc_type(3, 1)%sf(k, l, 0)) @@ -1633,7 +1633,7 @@ contains end select end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_populate_F_igr_buffers diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 4ba51e956..43925a796 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -129,7 +129,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, T]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys, omega, T]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end @@ -159,7 +159,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_compute_chemistry_reaction_flux @@ -191,7 +191,7 @@ contains offsets = 0 offsets(idir) = 1 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') do z = isc3%beg, isc3%end do y = isc2%beg, isc2%end do x = isc1%beg, isc1%end @@ -298,7 +298,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_compute_chemistry_diffusion_flux diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp index dad703558..cef21f25a 100644 --- a/src/common/m_compute_levelset.fpp +++ b/src/common/m_compute_levelset.fpp @@ -43,7 +43,7 @@ contains center(1) = patch_ib(ib_patch_id)%x_centroid center(2) = patch_ib(ib_patch_id)%y_centroid - #:call GPU_PARALLEL_LOOP(private='[i,j,dist_vec,dist]', & + $:GPU_PARALLEL_LOOP(private='[i,j,dist_vec,dist]', & & copyin='[ib_patch_id,center,radius]', collapse=2) do i = 0, m do j = 0, n @@ -62,7 +62,7 @@ contains end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_circle_levelset @@ -87,7 +87,7 @@ contains inverse_rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(ib_patch_id)%rotation_matrix(:, :) - #:call GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,dist_vec,dist,global_dist,global_id]', & + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,dist_vec,dist,global_dist,global_id]', & & copyin='[ib_patch_id,center,rotation,inverse_rotation,airfoil_grid_u,airfoil_grid_l]', collapse=2) do i = 0, m do j = 0, n @@ -148,7 +148,7 @@ contains end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_airfoil_levelset @@ -180,7 +180,7 @@ contains z_max = center(3) + lz/2 z_min = center(3) - lz/2 - #:call GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,dist_vec,dist,global_dist,global_id,dist_side,dist_surf]', & + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,dist_vec,dist,global_dist,global_id,dist_side,dist_surf]', & & copyin='[ib_patch_id,center,rotation,inverse_rotation,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) do l = 0, p do j = 0, n @@ -255,7 +255,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_3D_airfoil_levelset @@ -290,7 +290,7 @@ contains bottom_left(1) = -length_x/2 bottom_left(2) = -length_y/2 - #:call GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,idx,side_dists,xy_local,dist_vec]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,idx,side_dists,xy_local,dist_vec]', & & copyin='[ib_patch_id,center,bottom_left,top_right,inverse_rotation,rotation]', collapse=2) do i = 0, m do j = 0, n @@ -332,7 +332,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_rectangle_levelset @@ -371,7 +371,7 @@ contains Front = length_z/2 Back = -length_z/2 - #:call GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,side_dists,xyz_local,dist_vec]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,side_dists,xyz_local,dist_vec]', & & copyin='[ib_patch_id,center,inverse_rotation,rotation,Right,Left,Top,Bottom,Front,Back]', collapse=3) do i = 0, m do j = 0, n @@ -441,7 +441,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_cuboid_levelset @@ -461,7 +461,7 @@ contains center(2) = patch_ib(ib_patch_id)%y_centroid center(3) = patch_ib(ib_patch_id)%z_centroid - #:call GPU_PARALLEL_LOOP(private='[i,j,k,dist_vec,dist]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,dist_vec,dist]', & & copyin='[ib_patch_id,center,radius]', collapse=3) do i = 0, m do j = 0, n @@ -479,7 +479,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_sphere_levelset @@ -526,7 +526,7 @@ contains dist_surface_vec = (/1, 1, 0/) end if - #:call GPU_PARALLEL_LOOP(private='[i,j,k,side_pos,dist_side,dist_surface,xyz_local]', & + $:GPU_PARALLEL_LOOP(private='[i,j,k,side_pos,dist_side,dist_surface,xyz_local]', & & copyin='[ib_patch_id,center,radius,inverse_rotation,rotation,dist_sides_vec,dist_surface_vec]', collapse=3) do i = 0, m do j = 0, n @@ -560,7 +560,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_cylinder_levelset diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index c01953e21..e37160248 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -18,7 +18,7 @@ contains real(wp) :: divergence - #:call GPU_PARALLEL_LOOP(collapse=3, private='[divergence]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,divergence]') do x = ix_s%beg, ix_s%end do y = iy_s%beg, iy_s%end do z = iz_s%beg, iz_s%end @@ -56,7 +56,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_compute_fd_divergence diff --git a/src/common/m_ib_patches.fpp b/src/common/m_ib_patches.fpp index c9f422b97..314ed0dfd 100644 --- a/src/common/m_ib_patches.fpp +++ b/src/common/m_ib_patches.fpp @@ -162,7 +162,7 @@ contains ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,radius]', collapse=2) do j = 0, n do i = 0, m @@ -173,7 +173,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_circle @@ -270,7 +270,7 @@ contains end if - #:call GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,inverse_rotation,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) do j = 0, n do i = 0, m @@ -325,7 +325,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_airfoil @@ -423,7 +423,7 @@ contains airfoil_grid_l(Np)%y = 0._wp end if - #:call GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,inverse_rotation,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=3) do l = 0, p do j = 0, n @@ -475,7 +475,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_3D_airfoil @@ -522,7 +522,7 @@ contains ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) do j = 0, n do i = 0, m @@ -541,7 +541,7 @@ contains end if end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_rectangle @@ -582,7 +582,7 @@ contains ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,radius]', collapse=3) do k = 0, p do j = 0, n @@ -602,7 +602,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_sphere @@ -644,7 +644,7 @@ contains ! and verifying whether the current patch has permission to write to ! to that cell. If both queries check out, the primitive variables ! of the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) do k = 0, p do j = 0, n @@ -673,7 +673,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_cuboid @@ -719,7 +719,7 @@ contains ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - #:call GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) do k = 0, p do j = 0, n @@ -758,7 +758,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_ib_cylinder diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index cbf5040cd..d2e6767e4 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,7 +757,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -768,10 +768,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -785,9 +785,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -801,10 +801,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 @@ -817,10 +817,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -835,9 +835,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -852,10 +852,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:else - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -868,10 +868,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -886,9 +886,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -903,7 +903,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endif end if @@ -958,7 +958,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -976,10 +976,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -993,9 +993,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -1009,10 +1009,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') do i = 1, nVar do l = 0, p do k = -buff_size, -1 @@ -1031,10 +1031,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -1049,9 +1049,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -1066,11 +1066,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:else ! Unpacking buffer from bc_z%beg - #:call GPU_PARALLEL_LOOP(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1090,10 +1090,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1109,9 +1109,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1127,7 +1127,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endif end if diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 82d8f4138..47fbb126c 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -99,7 +99,7 @@ contains integer :: i, j, k, l ! starting equilibrium solver - #:call GPU_PARALLEL_LOOP(collapse=3, private='[p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') do j = 0, m do k = 0, n do l = 0, p @@ -269,7 +269,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_infinite_relaxation_k diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 4d4bf60fa..359af5b27 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -873,7 +873,7 @@ contains end if #:endif - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, T]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, T]') do l = ibounds(3)%beg, ibounds(3)%end do k = ibounds(2)%beg, ibounds(2)%end do j = ibounds(1)%beg, ibounds(1)%end @@ -1165,7 +1165,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_convert_conservative_to_primitive_variables @@ -1492,7 +1492,7 @@ contains ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e @@ -1598,7 +1598,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #endif end subroutine s_convert_primitive_to_flux_variables From 477a13b13aeeb5f16e7680d36b3d651ed6d15155 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 10:19:13 -0500 Subject: [PATCH 03/33] Modified a few hundred macros --- src/simulation/m_acoustic_src.fpp | 12 +- src/simulation/m_body_forces.fpp | 20 +-- src/simulation/m_bubbles_EE.fpp | 28 ++-- src/simulation/m_bubbles_EL.fpp | 88 ++++++------ src/simulation/m_bubbles_EL_kernels.fpp | 8 +- src/simulation/m_cbc.fpp | 140 +++++++++--------- src/simulation/m_data_output.fpp | 4 +- src/simulation/m_derived_variables.fpp | 64 ++++----- src/simulation/m_fftw.fpp | 32 ++--- src/simulation/m_hyperelastic.fpp | 4 +- src/simulation/m_hypoelastic.fpp | 56 ++++---- src/simulation/m_ibm.fpp | 8 +- src/simulation/m_igr.fpp | 48 +++---- src/simulation/m_mhd.fpp | 4 +- src/simulation/m_mpi_proxy.fpp | 24 ++-- src/simulation/m_muscl.fpp | 16 +-- src/simulation/m_pressure_relaxation.fpp | 4 +- src/simulation/m_qbmm.fpp | 12 +- src/simulation/m_rhs.fpp | 172 +++++++++++------------ src/simulation/m_riemann_solvers.fpp | 108 +++++++------- src/simulation/m_surface_tension.fpp | 20 +-- src/simulation/m_time_steppers.fpp | 18 +-- src/simulation/m_viscous.fpp | 88 ++++++------ src/simulation/m_weno.fpp | 20 +-- 24 files changed, 499 insertions(+), 499 deletions(-) diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 3da61d2e4..22c8d1a58 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -166,7 +166,7 @@ contains sim_time = t_step*dt - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -178,7 +178,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source @@ -220,7 +220,7 @@ contains deallocate (phi_rn) - #:call GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho]') + $:GPU_PARALLEL_LOOP(private='[i,myalpha,myalpha_rho]') do i = 1, num_points j = source_spatials(ai)%coord(1, i) k = source_spatials(ai)%coord(2, i) @@ -317,12 +317,12 @@ contains end if end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end do ! Update the rhs variables - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]',collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -338,7 +338,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_acoustic_src_calculations !> This subroutine gives the temporally varying amplitude of the pulse diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index c5ba29c59..9bfb37632 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -73,7 +73,7 @@ contains type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer :: i, j, k, l !< standard iterators - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -85,7 +85,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_compute_mixture_density @@ -104,7 +104,7 @@ contains call s_compute_acceleration(mytime) call s_compute_mixture_density(q_cons_vf) - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -114,11 +114,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (bf_x) then ! x-direction body forces - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -129,12 +129,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bf_y) then ! y-direction body forces - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -145,12 +145,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bf_z) then ! z-direction body forces - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -161,7 +161,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index f198d2e78..604fbb4de 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -76,7 +76,7 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -89,7 +89,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_comp_alpha_from_n @@ -104,7 +104,7 @@ contains if (idir == 1) then if (.not. qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -116,12 +116,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if elseif (idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -132,11 +132,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -147,7 +147,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if @@ -177,7 +177,7 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -193,10 +193,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP adap_dt_stop_max = 0 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') do l = 0, p @@ -326,12 +326,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,l,q]', collapse=3) do l = 0, p do q = 0, n do i = 0, m @@ -350,7 +350,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_compute_bubble_EE_source diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index c205b35f3..da3db0994 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -592,7 +592,7 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - #:call GPU_PARALLEL_LOOP(private='[k,cell]') + $:GPU_PARALLEL_LOOP(private='[k,cell]') do k = 1, nBubs call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) @@ -609,12 +609,12 @@ contains bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) end if end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Radial motion model adap_dt_stop_max = 0 - #:call GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & + $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs @@ -680,19 +680,19 @@ contains adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position - #:call GPU_PARALLEL_LOOP(collapse=2, private='[k]', copyin='[stage]') + $:GPU_PARALLEL_LOOP(collapse=2, private='[k,l]', copyin='[stage]') do k = 1, nBubs do l = 1, 3 mtn_dposdt(k, l, stage) = 0._wp mtn_dveldt(k, l, stage) = 0._wp end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call nvtxEndRange @@ -717,7 +717,7 @@ contains ! (q / (1 - beta)) * d(beta)/dt source if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -732,9 +732,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do k = 0, p do j = 0, n do i = 0, m @@ -748,7 +748,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if do l = 1, num_dims @@ -756,7 +756,7 @@ contains call s_gradient_dir(q_prim_vf(E_idx), q_beta%vf(3), l) ! (q / (1 - beta)) * d(beta)/dt source - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -769,10 +769,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP !source in energy - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(2)%beg, idwbuff(2)%end do i = idwbuff(1)%beg, idwbuff(1)%end @@ -780,12 +780,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) ! (beta / (1 - beta)) * d(Pu)/dl source - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -797,7 +797,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end do end if @@ -843,7 +843,7 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, q_beta_idx do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -853,13 +853,13 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & mtn_s, mtn_pos, q_beta) !Store 1-beta - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -870,7 +870,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call nvtxEndRange @@ -1101,7 +1101,7 @@ contains integer :: k if (time_stepper == 1) then ! 1st order TVD RK - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1111,7 +1111,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1124,7 +1124,7 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(rivate='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1134,10 +1134,10 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (stage == 2) then - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp @@ -1147,7 +1147,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1162,7 +1162,7 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1172,10 +1172,10 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (stage == 2) then - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp @@ -1185,9 +1185,9 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (stage == 3) then - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) @@ -1197,7 +1197,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1274,7 +1274,7 @@ contains integer :: k - #:call GPU_PARALLEL_LOOP(private='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs gas_p(k, 2) = gas_p(k, 1) gas_mv(k, 2) = gas_mv(k, 1) @@ -1285,7 +1285,7 @@ contains mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_transfer_data_to_tmp @@ -1373,7 +1373,7 @@ contains if (dir == 1) then ! Gradient in x dir. - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1385,10 +1385,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (dir == 2) then ! Gradient in y dir. - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1400,10 +1400,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (dir == 3) then ! Gradient in z dir. - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -1415,7 +1415,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_gradient_dir @@ -1511,7 +1511,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - #:call GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m @@ -1524,7 +1524,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #ifdef MFC_MPI if (num_procs > 1) then @@ -1707,7 +1707,7 @@ contains integer :: k - #:call GPU_PARALLEL_LOOP(reduction='[[Rmax_glb], [Rmin_glb]]', & + $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', & & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') do k = 1, nBubs Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) @@ -1715,7 +1715,7 @@ contains Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_calculate_lag_bubble_stats diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 218eaa6ea..bd592acdc 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -55,7 +55,7 @@ contains real(wp), dimension(3) :: s_coord integer :: l - #:call GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') + $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') do l = 1, nBubs volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp @@ -90,7 +90,7 @@ contains updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 end if end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_deltafunc @@ -121,7 +121,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - #:call GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') + $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp @@ -196,7 +196,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_gaussian diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 4df1c4fcf..7106fdf09 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -710,7 +710,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -721,9 +721,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -734,7 +734,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 else @@ -743,7 +743,7 @@ contains F_src_rs${XYZ}$_vf, & is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) do i = 1, flux_cbc_index do j = 0, 1 do r = is3%beg, is3%end @@ -762,9 +762,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) do i = advxb, advxe do j = 0, 1 do r = is3%beg, is3%end @@ -783,12 +783,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! FD2 or FD4 of RHS at j = 0 - #:call GPU_PARALLEL_LOOP(collapse=2, private='[alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') + $:GPU_PARALLEL_LOOP(collapse=2, private='[r,k,alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1126,7 +1126,7 @@ contains end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endfor @@ -1187,7 +1187,7 @@ contains ! Reshaping Inputted Data in x-direction if (cbc_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1198,9 +1198,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1210,9 +1210,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1224,9 +1224,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1235,10 +1235,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1249,9 +1249,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1261,7 +1261,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! END: Reshaping Inputted Data in x-direction @@ -1269,7 +1269,7 @@ contains ! Reshaping Inputted Data in y-direction elseif (cbc_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1280,9 +1280,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1292,9 +1292,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1306,9 +1306,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1317,10 +1317,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1331,9 +1331,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1343,7 +1343,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! END: Reshaping Inputted Data in y-direction @@ -1351,7 +1351,7 @@ contains ! Reshaping Inputted Data in z-direction else - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, sys_size do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1362,9 +1362,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = 0, buff_size @@ -1374,9 +1374,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1388,9 +1388,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1399,10 +1399,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1413,9 +1413,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1425,7 +1425,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -1455,7 +1455,7 @@ contains ! Reshaping Outputted Data in x-direction if (cbc_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1467,8 +1467,8 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:END_GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1477,10 +1477,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1491,9 +1491,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1503,14 +1503,14 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! END: Reshaping Outputted Data in x-direction ! Reshaping Outputted Data in y-direction elseif (cbc_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1522,9 +1522,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1533,10 +1533,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1547,9 +1547,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1559,7 +1559,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! END: Reshaping Outputted Data in y-direction @@ -1567,7 +1567,7 @@ contains ! Reshaping Outputted Data in z-direction else - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1579,9 +1579,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1590,10 +1590,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = advxb, advxe do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1604,9 +1604,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end do j = -1, buff_size @@ -1616,7 +1616,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 7ae10f440..542af075a 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -279,7 +279,7 @@ contains integer :: j, k, l ! Computing Stability Criteria at Current Time-step - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -296,7 +296,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index f4653a936..14b08c84f 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -146,7 +146,7 @@ contains z_accel) end if - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) do k = 0, p do j = 0, n do i = 0, m @@ -163,7 +163,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP $:GPU_UPDATE(host='[accel_mag]') @@ -204,7 +204,7 @@ contains ! Computing the acceleration component in the x-coordinate direction if (i == 1) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -215,10 +215,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (n == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -230,9 +230,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -246,10 +246,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -265,9 +265,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -283,12 +283,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if ! Computing the acceleration component in the y-coordinate direction elseif (i == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -299,10 +299,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -316,10 +316,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -336,9 +336,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -354,12 +354,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if ! Computing the acceleration component in the z-coordinate direction else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -370,10 +370,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -391,9 +391,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -409,7 +409,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -438,7 +438,7 @@ contains end do if (n == 0) then !1D simulation - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') do l = 0, p !Loop over grid do k = 0, n do j = 0, m @@ -458,9 +458,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (p == 0) then !2D simulation - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') do l = 0, p !Loop over grid do k = 0, n do j = 0, m @@ -483,9 +483,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else !3D simulation - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dV]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') do l = 0, p !Loop over grid do k = 0, n do j = 0, m @@ -512,7 +512,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if $:GPU_UPDATE(host='[c_m]') diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 39c8bd493..7f7cc6582 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -141,7 +141,7 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_GPU) - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -149,9 +149,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -159,7 +159,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:if not USING_NVHPC p_real => data_real_gpu @@ -179,7 +179,7 @@ contains Nfq = 3 $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -187,7 +187,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -198,7 +198,7 @@ contains #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p @@ -207,11 +207,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP do i = 1, fourier_rings - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size do j = 0, m do l = 1, cmplx_size @@ -219,9 +219,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -229,7 +229,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') #if defined(__PGI) @@ -243,7 +243,7 @@ contains Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) $:GPU_UPDATE(device='[Nfq]') - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size do j = 0, m do l = 1, Nfq @@ -251,7 +251,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -262,7 +262,7 @@ contains #endif #:endcall GPU_HOST_DATA - #:call GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -271,7 +271,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end do #:endcall GPU_DATA diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 2b171016c..581b5f7bf 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -106,7 +106,7 @@ contains real(wp) :: G_local integer :: j, k, l, i, r - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') do l = 0, p do k = 0, n do j = 0, m @@ -207,7 +207,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_hyperelastic_rmt_stress_update !> The following subroutine handles the calculation of the btensor. diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 312c2343b..b973d90b5 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -104,7 +104,7 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -112,9 +112,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -127,10 +127,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (ndirs > 1) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -138,9 +138,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -156,12 +156,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! 3D if (ndirs == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -170,9 +170,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -192,11 +192,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -218,10 +218,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! apply rhs source term to elastic stress equation - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -233,10 +233,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -269,10 +269,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -336,12 +336,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (cyl_coord .and. idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -369,7 +369,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if @@ -404,14 +404,14 @@ contains if (n == 0) then l = 0; q = 0 - #:call GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[k]', copyin='[l,q]') do k = 0, m rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (p == 0) then q = 0 - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', copyin='[q]' collapse=2) do l = 0, n do k = 0, m ! Maximum principal stress @@ -424,9 +424,9 @@ contains rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -463,7 +463,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_compute_damage_state diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 4bf95a4ba..5b32e68d6 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -197,7 +197,7 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp if (num_gps > 0) then - $:NEW_GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') + $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') do i = 1, num_gps gp = ghost_points(i) @@ -365,12 +365,12 @@ contains end do end if end do - ! $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if !Correct the state of the inner points in IBs if (num_inner_gps > 0) then - #:call GPU_PARALLEL_LOOP(private='[physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') + $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') do i = 1, num_inner_gps innerp = inner_points(i) @@ -383,7 +383,7 @@ contains q_cons_vf(q)%sf(j, k, l) = 0._wp end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_ibm_correct_state diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 599fe2cc7..46cc0b385 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -162,7 +162,7 @@ contains end if #endif - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -171,7 +171,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p == 0) then alf_igr = alf_factor*max(dx(1), dy(1))**2._wp @@ -244,7 +244,7 @@ contains end if do q = 1, num_iters - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') do l = 0, p do k = 0, n do j = 0, m @@ -307,12 +307,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_populate_F_igr_buffers(bc_type, jac_sf) if (igr_iter_solver == 1) then ! Jacobi iteration - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -320,7 +320,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end do @@ -340,7 +340,7 @@ contains real(wp) :: F_L, vel_L, rho_L, F_R, vel_R, rho_R real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - #:call GPU_PARALLEL_LOOP(collapse=3, private='[F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R]') do l = 0, p do k = 0, n do j = -1, m @@ -391,7 +391,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_igr_sigma_x @@ -419,7 +419,7 @@ contains if (idir == 1) then if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = 0, n do j = -1, m @@ -797,9 +797,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = 0, n do j = -1, m @@ -1268,11 +1268,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if else if (idir == 2) then if (p == 0) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = -1, n do j = 0, m @@ -1639,9 +1639,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = -1, n do j = 0, m @@ -2095,10 +2095,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = -1, p do k = 0, n do j = 0, m @@ -2551,7 +2551,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_igr_riemann_solver @@ -2611,7 +2611,7 @@ contains integer, intent(in) :: idir if (idir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -2623,9 +2623,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (idir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -2638,9 +2638,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (idir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -2653,7 +2653,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_igr_flux_add diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 12ba72809..4e46dd459 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -76,7 +76,7 @@ contains real(wp), dimension(3) :: v, B real(wp) :: divB, vdotB - #:call GPU_PARALLEL_LOOP(collapse=3, private='[v, B]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k,l,q,v, B]') do q = 0, p do l = 0, n do k = 0, m @@ -129,7 +129,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index d578eb7ff..017906a7d 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -313,7 +313,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -322,9 +322,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size @@ -334,9 +334,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:else - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -346,7 +346,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:endif end if #:endfor @@ -390,7 +390,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -399,9 +399,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size @@ -411,10 +411,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:else ! Unpacking buffer from bc_z%beg - #:call GPU_PARALLEL_LOOP(collapse=3,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size @@ -425,7 +425,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:endif end if #:endfor diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index 335ef7e03..748aafb8c 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -118,7 +118,7 @@ contains if (muscl_order == 1) then if (muscl_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_muscl%beg, is3_muscl%end do k = is2_muscl%beg, is2_muscl%end @@ -131,7 +131,7 @@ contains end do #:endcall else if (muscl_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_muscl%beg, is3_muscl%end do k = is2_muscl%beg, is2_muscl%end @@ -144,7 +144,7 @@ contains end do #:endcall else if (muscl_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_muscl%beg, is3_muscl%end do k = is2_muscl%beg, is2_muscl%end @@ -162,7 +162,7 @@ contains ! MUSCL Reconstruction #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[slopeL,slopeR,slope]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,slopeL,slopeR,slope]') do l = is3_muscl%beg, is3_muscl%end do k = is2_muscl%beg, is2_muscl%end do j = is1_muscl%beg, is1_muscl%end @@ -243,7 +243,7 @@ contains #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3,private='[aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') do l = is3_muscl%beg, is3_muscl%end do k = is2_muscl%beg, is2_muscl%end do j = is1_muscl%beg, is1_muscl%end @@ -318,7 +318,7 @@ contains $:GPU_UPDATE(device='[v_size]') if (muscl_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) do j = 1, v_size do q = is3_muscl%beg, is3_muscl%end do l = is2_muscl%beg, is2_muscl%end @@ -335,7 +335,7 @@ contains if (n == 0) return if (muscl_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) do j = 1, v_size do q = is3_muscl%beg, is3_muscl%end do l = is2_muscl%beg, is2_muscl%end @@ -351,7 +351,7 @@ contains ! Reshaping/Projecting onto Characteristic Fields in z-direction if (p == 0) return if (muscl_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) do j = 1, v_size do q = is3_muscl%beg, is3_muscl%end do l = is2_muscl%beg, is2_muscl%end diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 407c01ff1..1a3c2795c 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -70,7 +70,7 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: j, k, l - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -78,7 +78,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_pressure_relaxation_procedure diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 85dd51b16..cd160ec81 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -433,7 +433,7 @@ contains end select if (.not. polytropic) then - #:call GPU_PARALLEL_LOOP(collapse=5,private='[nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') do i = 1, nb do q = 1, nnode do l = 0, p @@ -534,12 +534,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! The following block is not repeated and is left as is if (idir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,l,q]', collapse=3) do l = 0, p do q = 0, n do i = 0, m @@ -558,7 +558,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_compute_qbmm_rhs @@ -714,7 +714,7 @@ contains is1_qbmm = ix; is2_qbmm = iy; is3_qbmm = iz $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') - #:call GPU_PARALLEL_LOOP(collapse=3, private='[moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[id1,id2,id3,moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') do id3 = is3_qbmm%beg, is3_qbmm%end do id2 = is2_qbmm%beg, is2_qbmm%end do id1 = is1_qbmm%beg, is1_qbmm%end @@ -851,7 +851,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP contains ! Helper to select the correct coefficient routine diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index a9b0e0665..71b3337d3 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -564,7 +564,7 @@ contains end do end if ! end allocation of viscous variables - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l,id]', collapse=4) do id = 1, num_dims do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end @@ -576,7 +576,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! end allocation for .not. igr @@ -647,7 +647,7 @@ contains if (.not. igr) then ! Association/Population of Working Variables - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -657,12 +657,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end @@ -679,7 +679,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -739,7 +739,7 @@ contains if (igr) then if (id == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do l = -1, p + 1 do k = -1, n + 1 do j = -1, m + 1 @@ -749,7 +749,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if call nvtxStartRange("IGR_RIEMANN") @@ -970,7 +970,7 @@ contains ! END: Dimensional Splitting Loop if (ib) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -982,7 +982,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Additional Physics and Source Temrs @@ -1037,7 +1037,7 @@ contains if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then if (.not. igr) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1047,7 +1047,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -1079,7 +1079,7 @@ contains real(wp) :: advected_qty_val, pressure_val, velocity_val if (alt_soundspeed) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[k_loop,l_loop,q_loop]', collapse=3) do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1102,7 +1102,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if select case (idir) @@ -1114,7 +1114,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k_loop,l_loop,q_loop,inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do q_loop = 0, p do l_loop = 0, n @@ -1127,10 +1127,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (model_eqns == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k_loop,l_loop,q_loop,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do q_loop = 0, p do l_loop = 0, n do k_loop = 0, m @@ -1147,7 +1147,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1160,7 +1160,7 @@ contains call s_cbc(q_prim_vf%vf, flux_n(idir)%vf, flux_src_n_vf%vf, idir, 1, irx, iry, irz) end if - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do l = 0, p do k = 0, n @@ -1173,10 +1173,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (model_eqns == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do l = 0, p do k = 0, n do q = 0, m @@ -1198,11 +1198,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (cyl_coord) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') do j = 1, sys_size do l = 0, p do k = 0, n @@ -1215,7 +1215,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1229,7 +1229,7 @@ contains end if if (grid_geometry == 3) then ! Cylindrical Coordinates - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,velocity_val,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,velocity_val,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1244,8 +1244,8 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=4,private='[flux_face1,flux_face2]') + $:END_GPU_PARALLEL_LOOP + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1258,9 +1258,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else ! Cartesian Coordinates - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p do q = 0, n @@ -1273,11 +1273,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (model_eqns == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') do k = 0, p do q = 0, n do l = 0, m @@ -1294,7 +1294,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1322,7 +1322,7 @@ contains case (1) ! x-direction use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do q_idx = 0, p ! z_extent do l_idx = 0, n ! y_extent @@ -1337,11 +1337,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) @@ -1352,9 +1352,9 @@ contains rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) @@ -1365,10 +1365,10 @@ contains rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m local_inv_ds = 1._wp/dx(k_idx) @@ -1379,14 +1379,14 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if case (2) ! y-direction: loops q_idx (x), k_idx (y), l_idx (z); sf(q_idx, k_idx, l_idx); dy(k_idx); Kterm(q_idx,k_idx,l_idx) use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do l_idx = 0, p ! z_extent do k_idx = 0, n ! y_extent @@ -1401,11 +1401,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) @@ -1420,9 +1420,9 @@ contains (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) end if end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) @@ -1437,10 +1437,10 @@ contains (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) end if end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m local_inv_ds = 1._wp/dy(k_idx) @@ -1451,7 +1451,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -1463,7 +1463,7 @@ contains end if if (use_standard_riemann) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do k_idx = 0, p ! z_extent do q_idx = 0, n ! y_extent @@ -1478,11 +1478,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) @@ -1493,9 +1493,9 @@ contains rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3, private='[local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) @@ -1506,10 +1506,10 @@ contains rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if else ! NOT alt_soundspeed - #:call GPU_PARALLEL_LOOP(collapse=4,private='[local_inv_ds, local_term_coeff,local_flux1,local_flux2]') + $:GPU_PARALLEL_LOOP(collapse=4, private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') do j_adv = advxb, advxe do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m local_inv_ds = 1._wp/dz(k_idx) @@ -1520,7 +1520,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if end select @@ -1542,7 +1542,7 @@ contains if (idir == 1) then ! x-direction if (surface_tension) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1554,11 +1554,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1591,13 +1591,13 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if elseif (idir == 2) then ! y-direction if (surface_tension) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1609,7 +1609,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (cyl_coord .and. ((bc_y%beg == -2) .or. (bc_y%beg == -14))) then @@ -1630,7 +1630,7 @@ contains idwbuff(1), idwbuff(2), idwbuff(3)) end if - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) do l = 0, p do j = 0, m $:GPU_LOOP(parallelism='[seq]') @@ -1642,11 +1642,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1660,12 +1660,12 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if ((surface_tension .or. viscous) .or. chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1697,7 +1697,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -1706,7 +1706,7 @@ contains if (cyl_coord) then if ((bc_y%beg == -2) .or. (bc_y%beg == -14)) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 1, n do j = 0, m @@ -1720,10 +1720,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) do l = 0, p do j = 0, m $:GPU_LOOP(parallelism='[seq]') @@ -1734,11 +1734,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if else - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1752,14 +1752,14 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if elseif (idir == 3) then ! z-direction if (surface_tension) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1771,11 +1771,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1807,11 +1807,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -1827,7 +1827,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -1939,7 +1939,7 @@ contains $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1950,9 +1950,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (recon_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1963,9 +1963,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (recon_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1976,7 +1976,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if #:endfor diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index bbb190fb1..d19b67f42 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -974,7 +974,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endfor @@ -1635,7 +1635,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endfor @@ -1832,7 +1832,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if @@ -2383,7 +2383,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (model_eqns == 4) then !ME4 @@ -2623,7 +2623,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (model_eqns == 2 .and. bubbles_euler) then #:call GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') @@ -3061,7 +3061,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else ! 5-EQUATION MODEL WITH HLLC #:call GPU_PARALLEL_LOOP(collapse=3, private='[T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') @@ -3541,7 +3541,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if #:endfor @@ -3827,7 +3827,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:endblock UNDEF_AMD end if #:endfor @@ -4040,7 +4040,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4053,7 +4053,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4066,7 +4066,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4079,7 +4079,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4099,7 +4099,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then @@ -4113,7 +4113,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4126,7 +4126,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4139,7 +4139,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4162,7 +4162,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then @@ -4175,7 +4175,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4186,7 +4186,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4198,7 +4198,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4216,7 +4216,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then @@ -4229,7 +4229,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4240,7 +4240,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4252,7 +4252,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4273,7 +4273,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4285,7 +4285,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end @@ -4295,7 +4295,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end @@ -4305,7 +4305,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4321,7 +4321,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (viscous) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4333,7 +4333,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4344,7 +4344,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4355,7 +4355,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4409,7 +4409,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (chem_params%diffusion) then @@ -4425,7 +4425,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (qbmm) then @@ -4439,7 +4439,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Reshaping Inputted Data in y-direction @@ -4456,7 +4456,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (chem_params%diffusion) then @@ -4472,7 +4472,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (qbmm) then @@ -4486,7 +4486,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Reshaping Inputted Data in z-direction @@ -4503,7 +4503,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (chem_params%diffusion) then @@ -4519,7 +4519,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (qbmm) then @@ -4533,7 +4533,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -4696,7 +4696,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_compute_cylindrical_viscous_source_flux @@ -4831,7 +4831,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_compute_cartesian_viscous_source_flux @@ -4923,7 +4923,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (cyl_coord) then #:call GPU_PARALLEL_LOOP(collapse=4) @@ -4937,7 +4937,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) @@ -4949,7 +4949,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then #:call GPU_PARALLEL_LOOP(collapse=4) @@ -4963,7 +4963,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Reshaping Outputted Data in z-direction @@ -4980,7 +4980,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (grid_geometry == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -4994,7 +4994,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) @@ -5006,7 +5006,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then #:call GPU_PARALLEL_LOOP(collapse=4) @@ -5020,7 +5020,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if elseif (norm_dir == 1) then @@ -5035,7 +5035,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3%beg, is3%end @@ -5046,7 +5046,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then #:call GPU_PARALLEL_LOOP(collapse=4) @@ -5060,7 +5060,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 40977b9a1..643850915 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -130,7 +130,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (id == 2) then @@ -176,7 +176,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (id == 3) then @@ -222,7 +222,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if @@ -252,7 +252,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = 0, p @@ -263,7 +263,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -275,7 +275,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=3) @@ -294,7 +294,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call s_populate_capillary_buffers(c_divs, bc_type) @@ -354,7 +354,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (recon_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -367,7 +367,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (recon_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -380,7 +380,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if #:endfor diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index d7ab8eb54..dc30041b5 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -524,7 +524,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then #:call GPU_PARALLEL_LOOP(collapse=5) @@ -552,7 +552,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt/rk_coef(s, 4)) @@ -699,7 +699,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') dt_local = minval(max_dt) @@ -741,7 +741,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP call nvtxEndRange @@ -767,7 +767,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 1) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -779,7 +779,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 2) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -791,7 +791,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 3) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -803,7 +803,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else ! All other timesteps #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, sys_size @@ -818,7 +818,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_time_step_cycling diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index f6d8cb6b2..e40c5577a 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -91,7 +91,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (shear_stress) then ! Shear stresses #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') @@ -198,7 +198,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bulk_stress) then ! Bulk stresses @@ -301,7 +301,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (p == 0) return @@ -412,7 +412,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bulk_stress) then ! Bulk stresses @@ -513,7 +513,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_compute_viscous_stress_tensor @@ -612,7 +612,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -628,7 +628,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (n > 0) then @@ -646,7 +646,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -662,7 +662,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -682,7 +682,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -703,7 +703,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -724,7 +724,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -745,7 +745,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then @@ -764,7 +764,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 @@ -781,7 +781,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -803,7 +803,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -825,7 +825,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -847,7 +847,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -869,7 +869,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end @@ -891,7 +891,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 @@ -913,7 +913,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end @@ -934,7 +934,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP #:call GPU_PARALLEL_LOOP(collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end @@ -954,7 +954,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP do i = iv%beg, iv%end call s_compute_fd_gradient(q_prim_qp%vf(i), & @@ -1058,7 +1058,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -1071,7 +1071,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -1084,7 +1084,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if end if @@ -1162,7 +1162,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -1175,7 +1175,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 1) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end @@ -1188,7 +1188,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if end if @@ -1260,7 +1260,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! END: First-Order Spatial Derivatives in x-direction @@ -1289,7 +1289,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP ! END: First-Order Spatial Derivatives in y-direction @@ -1318,7 +1318,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! END: First-Order Spatial Derivatives in z-direction @@ -1368,7 +1368,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=3) @@ -1381,7 +1381,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (p > 0) then @@ -1395,7 +1395,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:call GPU_PARALLEL_LOOP(collapse=2) @@ -1409,7 +1409,7 @@ contains (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (n > 0) then #:call GPU_PARALLEL_LOOP(collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end @@ -1422,7 +1422,7 @@ contains (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (p > 0) then #:call GPU_PARALLEL_LOOP(collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1435,7 +1435,7 @@ contains (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if @@ -1447,7 +1447,7 @@ contains (x_cc(2) - x_cc(0)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_x%end <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) @@ -1457,7 +1457,7 @@ contains (x_cc(m) - x_cc(m - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then @@ -1468,7 +1468,7 @@ contains (y_cc(2) - y_cc(0)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_y%end <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) @@ -1478,7 +1478,7 @@ contains (y_cc(n) - y_cc(n - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then @@ -1490,7 +1490,7 @@ contains (z_cc(2) - z_cc(0)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if if (bc_z%end <= BC_GHOST_EXTRAP) then #:call GPU_PARALLEL_LOOP(collapse=2) @@ -1501,7 +1501,7 @@ contains (z_cc(p) - z_cc(p - 2)) end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 6874237a4..ec36b5b5e 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -681,7 +681,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (weno_dir == 2) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -694,7 +694,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP else if (weno_dir == 3) then #:call GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -707,7 +707,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] @@ -785,7 +785,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endfor elseif (weno_order == 5) then @@ -900,7 +900,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP if (mp_weno) then call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & @@ -1096,7 +1096,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if #:endfor @@ -1149,7 +1149,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Reshaping/Projecting onto Characteristic Fields in y-direction @@ -1166,7 +1166,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if ! Reshaping/Projecting onto Characteristic Fields in z-direction @@ -1183,7 +1183,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end if end subroutine s_initialize_weno @@ -1358,7 +1358,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP end subroutine s_preserve_monotonicity From 3c010f89362338ffc314161dc37e7eab5bc914dc Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 10:45:38 -0500 Subject: [PATCH 04/33] Finished with parallel loops --- src/simulation/m_riemann_solvers.fpp | 108 +++++++++++++-------------- src/simulation/m_surface_tension.fpp | 20 ++--- src/simulation/m_time_steppers.fpp | 18 ++--- src/simulation/m_viscous.fpp | 88 +++++++++++----------- src/simulation/m_weno.fpp | 20 ++--- 5 files changed, 127 insertions(+), 127 deletions(-) diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d19b67f42..252e02351 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -358,7 +358,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1118,7 +1118,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, vel_grad_L, vel_grad_R, idx_right_phys]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, vel_grad_L, vel_grad_R, idx_right_phys]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -1641,7 +1641,7 @@ contains #:endfor if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -1991,7 +1991,7 @@ contains ! 6-EQUATION MODEL WITH HLLC if (model_eqns == 3) then !ME3 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2387,7 +2387,7 @@ contains elseif (model_eqns == 4) then !ME4 - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -2626,7 +2626,7 @@ contains $:END_GPU_PARALLEL_LOOP elseif (model_eqns == 2 .and. bubbles_euler) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3064,7 +3064,7 @@ contains $:END_GPU_PARALLEL_LOOP else ! 5-EQUATION MODEL WITH HLLC - #:call GPU_PARALLEL_LOOP(collapse=3, private='[T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -3655,7 +3655,7 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then #:block UNDEF_AMD - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -4031,7 +4031,7 @@ contains if (norm_dir == 1) then if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4043,7 +4043,7 @@ contains $:END_GPU_PARALLEL_LOOP if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -4056,7 +4056,7 @@ contains $:END_GPU_PARALLEL_LOOP if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -4069,7 +4069,7 @@ contains $:END_GPU_PARALLEL_LOOP if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -4090,7 +4090,7 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4103,7 +4103,7 @@ contains if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -4116,7 +4116,7 @@ contains $:END_GPU_PARALLEL_LOOP if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -4129,7 +4129,7 @@ contains $:END_GPU_PARALLEL_LOOP if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -4153,7 +4153,7 @@ contains elseif (norm_dir == 2) then if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4166,7 +4166,7 @@ contains if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -4177,7 +4177,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -4189,7 +4189,7 @@ contains $:END_GPU_PARALLEL_LOOP if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -4207,7 +4207,7 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4220,7 +4220,7 @@ contains if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -4231,7 +4231,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -4243,7 +4243,7 @@ contains $:END_GPU_PARALLEL_LOOP if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe do l = isz%beg, isz%end do j = isx%beg, isx%end @@ -4264,7 +4264,7 @@ contains else if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4276,7 +4276,7 @@ contains $:END_GPU_PARALLEL_LOOP if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4286,7 +4286,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4296,7 +4296,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4312,7 +4312,7 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4324,7 +4324,7 @@ contains $:END_GPU_PARALLEL_LOOP if (viscous) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4335,7 +4335,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4346,7 +4346,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -4399,7 +4399,7 @@ contains if (viscous .or. (surface_tension)) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4413,7 +4413,7 @@ contains end if if (chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = E_idx, chemxe do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4429,7 +4429,7 @@ contains end if if (qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4446,7 +4446,7 @@ contains elseif (norm_dir == 2) then if (viscous .or. (surface_tension)) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4460,7 +4460,7 @@ contains end if if (chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = E_idx, chemxe do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4476,7 +4476,7 @@ contains end if if (qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4493,7 +4493,7 @@ contains else if (viscous .or. (surface_tension)) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = momxb, E_idx do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4507,7 +4507,7 @@ contains end if if (chem_params%diffusion) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = E_idx, chemxe do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4523,7 +4523,7 @@ contains end if if (qbmm) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, 4 do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -4590,7 +4590,7 @@ contains integer :: i_vel !!< Loop iterator for velocity components. integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. - #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') do l = iz%beg, iz%end do k = iy%beg, iy%end do j = ix%beg, ix%end @@ -4750,7 +4750,7 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. - #:call GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j_loop,k_loop,l_loop,idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') do l_loop = isz%beg, isz%end do k_loop = isy%beg, isy%end do j_loop = isx%beg, isx%end @@ -4912,7 +4912,7 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4926,7 +4926,7 @@ contains $:END_GPU_PARALLEL_LOOP if (cyl_coord) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4940,7 +4940,7 @@ contains $:END_GPU_PARALLEL_LOOP end if - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4952,7 +4952,7 @@ contains $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do j = is1%beg, is1%end @@ -4968,7 +4968,7 @@ contains end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4982,7 +4982,7 @@ contains end do $:END_GPU_PARALLEL_LOOP if (grid_geometry == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -4997,7 +4997,7 @@ contains $:END_GPU_PARALLEL_LOOP end if - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end @@ -5009,7 +5009,7 @@ contains $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = advxb + 1, advxe do j = is1%beg, is1%end do k = is2%beg, is2%end @@ -5024,7 +5024,7 @@ contains end if elseif (norm_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -5037,7 +5037,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end @@ -5049,7 +5049,7 @@ contains $:END_GPU_PARALLEL_LOOP if (riemann_solver == 1 .or. riemann_solver == 4) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = advxb + 1, advxe do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 643850915..28412f95e 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -88,7 +88,7 @@ contains integer :: j, k, l, i if (id == 1) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -134,7 +134,7 @@ contains elseif (id == 2) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -180,7 +180,7 @@ contains elseif (id == 3) then - #:call GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end @@ -243,7 +243,7 @@ contains isx%end = m; isy%end = n; isz%end = p ! compute gradient components - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -254,7 +254,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -266,7 +266,7 @@ contains $:END_GPU_PARALLEL_LOOP if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -278,7 +278,7 @@ contains $:END_GPU_PARALLEL_LOOP end if - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -343,7 +343,7 @@ contains $:GPU_UPDATE(device='[is1,is2,is3,iv]') if (recon_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -356,7 +356,7 @@ contains end do $:END_GPU_PARALLEL_LOOP else if (recon_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -369,7 +369,7 @@ contains end do $:END_GPU_PARALLEL_LOOP else if (recon_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index dc30041b5..110559bd1 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -507,7 +507,7 @@ contains end if if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -527,7 +527,7 @@ contains $:END_GPU_PARALLEL_LOOP !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then - #:call GPU_PARALLEL_LOOP(collapse=5) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l,q]', collapse=5) do i = 1, nb do l = 0, p do k = 0, n @@ -682,7 +682,7 @@ contains idwint) end if - #:call GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re]') do l = 0, p do k = 0, n do j = 0, m @@ -730,7 +730,7 @@ contains call nvtxStartRange("RHS-BODYFORCES") call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = momxb, E_idx do l = 0, p do k = 0, n @@ -757,7 +757,7 @@ contains integer :: i, j, k, l !< Generic loop iterator if (t_step == t_step_start) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -769,7 +769,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -781,7 +781,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -793,7 +793,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (t_step == t_step_start + 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n @@ -805,7 +805,7 @@ contains end do $:END_GPU_PARALLEL_LOOP else ! All other timesteps - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = 0, p do k = 0, n diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index e40c5577a..d1e45589d 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -80,7 +80,7 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -94,7 +94,7 @@ contains $:END_GPU_PARALLEL_LOOP if (shear_stress) then ! Shear stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -202,7 +202,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -307,7 +307,7 @@ contains if (p == 0) return if (shear_stress) then ! Shear stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -416,7 +416,7 @@ contains end if if (bulk_stress) then ! Bulk stresses - #:call GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -598,7 +598,7 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = iy%beg, iy%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -614,7 +614,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -632,7 +632,7 @@ contains if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -648,7 +648,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -664,7 +664,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -684,7 +684,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -705,7 +705,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg + 1, is1_viscous%end @@ -726,7 +726,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end - 1 @@ -749,7 +749,7 @@ contains if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -766,7 +766,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -783,7 +783,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end @@ -805,7 +805,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end - 1 @@ -827,7 +827,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg + 1, is2_viscous%end do k = is1_viscous%beg, is1_viscous%end @@ -849,7 +849,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do j = is2_viscous%beg, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -871,7 +871,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -893,7 +893,7 @@ contains end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg + 1, is2_viscous%end - 1 do k = is1_viscous%beg, is1_viscous%end @@ -914,7 +914,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -935,7 +935,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end do k = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1047,7 +1047,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1060,7 +1060,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1073,7 +1073,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1151,7 +1151,7 @@ contains if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1164,7 +1164,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1177,7 +1177,7 @@ contains end do $:END_GPU_PARALLEL_LOOP elseif (norm_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end @@ -1244,7 +1244,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg + 1, is1_viscous%end - 1 @@ -1273,7 +1273,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg + 1, is2_viscous%end - 1 do j = is1_viscous%beg, is1_viscous%end @@ -1302,7 +1302,7 @@ contains ! cell-boundaries, to calculate the cell-averaged first-order ! spatial derivatives inside the cell. - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1358,7 +1358,7 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1371,7 +1371,7 @@ contains $:END_GPU_PARALLEL_LOOP if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1385,7 +1385,7 @@ contains end if if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end do k = is2_viscous%beg, is2_viscous%end do j = is1_viscous%beg, is1_viscous%end @@ -1398,7 +1398,7 @@ contains $:END_GPU_PARALLEL_LOOP end if - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(idwbuff(1)%beg, k, l) = & @@ -1411,7 +1411,7 @@ contains end do $:END_GPU_PARALLEL_LOOP if (n > 0) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, idwbuff(2)%beg, l) = & @@ -1424,7 +1424,7 @@ contains end do $:END_GPU_PARALLEL_LOOP if (p > 0) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, idwbuff(3)%beg) = & @@ -1440,7 +1440,7 @@ contains end if if (bc_x%beg <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & @@ -1450,7 +1450,7 @@ contains $:END_GPU_PARALLEL_LOOP end if if (bc_x%end <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & @@ -1461,7 +1461,7 @@ contains end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & @@ -1471,7 +1471,7 @@ contains $:END_GPU_PARALLEL_LOOP end if if (bc_y%end <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & @@ -1482,7 +1482,7 @@ contains end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, 0) = & @@ -1493,7 +1493,7 @@ contains $:END_GPU_PARALLEL_LOOP end if if (bc_z%end <= BC_GHOST_EXTRAP) then - #:call GPU_PARALLEL_LOOP(collapse=2) + $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end do j = idwbuff(1)%beg, idwbuff(1)%end grad_z%sf(j, k, p) = & diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index ec36b5b5e..869e4ca29 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -670,7 +670,7 @@ contains if (weno_order == 1) then if (weno_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -683,7 +683,7 @@ contains end do $:END_GPU_PARALLEL_LOOP else if (weno_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -696,7 +696,7 @@ contains end do $:END_GPU_PARALLEL_LOOP else if (weno_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end @@ -712,7 +712,7 @@ contains elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,beta,dvd,poly,omega,alpha,tau]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -791,7 +791,7 @@ contains elseif (weno_order == 5) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[i,j,k,l,dvd,poly,beta,alpha,omega,tau,delta]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -911,7 +911,7 @@ contains elseif (weno_order == 7) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - #:call GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[i,j,k,l,poly,beta,alpha,omega,tau,delta,dvd,v]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -1139,7 +1139,7 @@ contains $:GPU_UPDATE(device='[v_size]') if (weno_dir == 1) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1156,7 +1156,7 @@ contains if (n == 0) return if (weno_dir == 2) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1173,7 +1173,7 @@ contains if (p == 0) return if (weno_dir == 3) then - #:call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) do j = 1, v_size do q = is3_weno%beg, is3_weno%end do l = is2_weno%beg, is2_weno%end @@ -1233,7 +1233,7 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - #:call GPU_PARALLEL_LOOP(collapse=4,private='[d]') + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4,private='[d]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end From f8dd4cd4dae9702666c2edc69c6d08d4a0b5751e Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 10:47:39 -0500 Subject: [PATCH 05/33] Removed duplicate parallel statements --- src/common/include/acc_macros.fpp | 34 +----------------- src/common/include/omp_macros.fpp | 49 +------------------------- src/common/include/parallel_macros.fpp | 4 +-- 3 files changed, 4 insertions(+), 83 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 4348cbe71..d9c7ab2e9 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -129,39 +129,7 @@ $:end_acc_directive #:enddef -#:def ACC_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & - & default='present', firstprivate=None, reduction=None, reductionOp=None, & - & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & - & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) - #:set default_val = GEN_DEFAULT_STR(default) - #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set copy_val = GEN_COPY_STR(copy) - #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') - #:set copyout_val = GEN_COPYOUT_STR(copyout) - #:set create_val = GEN_CREATE_STR(create) - #:set no_create_val = GEN_NOCREATE_STR(no_create) - #:set present_val = GEN_PRESENT_STR(present) - #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) - #:set attach_val = GEN_ATTACH_STR(attach) - #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & - & copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') - #:set acc_directive = '!$acc parallel loop ' + & - & clause_val + extraAccArgs_val.strip('\n') - #:set acc_end_directive = '!$acc end parallel loop' - $:acc_directive - $:code - $:acc_end_directive -#:enddef - -#:def NEW_ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & +#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 82b07a9ca..9fb3abb9a 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -149,54 +149,7 @@ $:omp_end_directive #:enddef -#:def OMP_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', & - & default='present', firstprivate=None, reduction=None, reductionOp=None, & - & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & - & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) - - #:set collapse_val = GEN_COLLAPSE_STR(collapse) - #:set parallelism_val = OMP_PARALLELISM_STR(parallelism) - #:set default_val = OMP_DEFAULT_STR(default) - #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') - #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) - #:set copy_val = OMP_COPY_STR(copy) - #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') - #:set copyout_val = OMP_COPYOUT_STR(copyout) - #:set create_val = OMP_CREATE_STR(create) - #:set no_create_val = OMP_NOCREATE_STR(no_create) - #:set present_val = OMP_PRESENT_STR(present) - #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) - #:set attach_val = OMP_MAP_STR('always,tofrom', attach) - #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) - #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & - & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & - & copy_val.strip('\n') + copyin_val.strip('\n') + & - & copyout_val.strip('\n') + create_val.strip('\n') + & - & no_create_val.strip('\n') + present_val.strip('\n') + & - & deviceptr_val.strip('\n') + attach_val.strip('\n') - #! Hardcoding the parallelism for now - - #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID - #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' - #:set omp_end_directive = '!$omp end target teams loop' - #:elif MFC_COMPILER == CCE_COMPILER_ID - #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' - #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' - #:elif MFC_COMPILER == AMD_COMPILER_ID - #:set omp_start_directive = '!$omp target teams distribute parallel do ' - #:set omp_end_directive = '!$omp end target teams distribute parallel do' - #:else - #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' - #:set omp_end_directive = '!$omp end target teams loop' - #:endif - - #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n') - $:omp_directive - $:code - $:omp_end_directive -#:enddef - -#:def NEW_OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & +#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index c903745f4..3f5aa5dd5 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -25,9 +25,9 @@ & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) #if defined(MFC_OpenACC) - #:set directive = NEW_ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) #elif defined(MFC_OpenMP) - #:set directive = NEW_OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + #:set directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) #endif $:directive From fd482ed26f19bcf451476b4a2def335ad0a37ac2 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 11:05:28 -0500 Subject: [PATCH 06/33] This builds --- src/common/include/parallel_macros.fpp | 2 +- src/common/m_boundary_common.fpp | 36 ++++---- src/common/m_chemistry.fpp | 4 +- src/common/m_compute_levelset.fpp | 14 +-- src/common/m_finite_differences.fpp | 2 +- src/common/m_ib_patches.fpp | 14 +-- src/common/m_mpi_common.fpp | 36 ++++---- src/common/m_phase_change.fpp | 2 +- src/common/m_variables_conversion.fpp | 4 +- src/simulation/m_acoustic_src.fpp | 6 +- src/simulation/m_body_forces.fpp | 10 +-- src/simulation/m_bubbles_EE.fpp | 16 ++-- src/simulation/m_bubbles_EL.fpp | 46 +++++----- src/simulation/m_bubbles_EL_kernels.fpp | 4 +- src/simulation/m_cbc.fpp | 70 +++++++-------- src/simulation/m_data_output.fpp | 2 +- src/simulation/m_derived_variables.fpp | 32 +++---- src/simulation/m_fftw.fpp | 16 ++-- src/simulation/m_hyperelastic.fpp | 2 +- src/simulation/m_hypoelastic.fpp | 30 +++---- src/simulation/m_ibm.fpp | 4 +- src/simulation/m_igr.fpp | 24 ++--- src/simulation/m_mhd.fpp | 2 +- src/simulation/m_mpi_proxy.fpp | 12 +-- src/simulation/m_muscl.fpp | 16 ++-- src/simulation/m_pressure_relaxation.fpp | 2 +- src/simulation/m_qbmm.fpp | 6 +- src/simulation/m_rhs.fpp | 86 +++++++++--------- src/simulation/m_riemann_solvers.fpp | 108 +++++++++++------------ src/simulation/m_surface_tension.fpp | 20 ++--- src/simulation/m_time_steppers.fpp | 18 ++-- src/simulation/m_viscous.fpp | 88 +++++++++--------- src/simulation/m_weno.fpp | 22 ++--- 33 files changed, 378 insertions(+), 378 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 3f5aa5dd5..e14020678 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -39,7 +39,7 @@ #if defined(MFC_OpenACC) #:set end_directive = '!$acc end parallel loop' #elif defined(MFC_OpenMP) - #:set end_directive = END_OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + #:set end_directive = END_OMP_PARALLEL_LOOP() #endif $:end_directive diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 41bae5bdb..47434dfcb 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -112,7 +112,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end >= 0) then @@ -142,7 +142,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Population of Buffers in y-direction @@ -179,7 +179,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end >= 0) then @@ -209,7 +209,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Population of Buffers in z-direction @@ -243,7 +243,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end >= 0) then @@ -273,7 +273,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Population of Buffers in z-direction @@ -1178,7 +1178,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end >= 0) then @@ -1197,7 +1197,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (n == 0) return @@ -1219,7 +1219,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end >= 0) then @@ -1238,7 +1238,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (p == 0) return @@ -1260,7 +1260,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end >= 0) then @@ -1279,7 +1279,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_populate_capillary_buffers @@ -1501,7 +1501,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -1527,7 +1527,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -1555,7 +1555,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -1581,7 +1581,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (p == 0) then @@ -1608,7 +1608,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end >= 0) then @@ -1633,7 +1633,7 @@ contains end select end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_populate_F_igr_buffers diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 43925a796..220b987f1 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -159,7 +159,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_chemistry_reaction_flux @@ -298,7 +298,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_chemistry_diffusion_flux diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp index cef21f25a..095c542d5 100644 --- a/src/common/m_compute_levelset.fpp +++ b/src/common/m_compute_levelset.fpp @@ -62,7 +62,7 @@ contains end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_circle_levelset @@ -148,7 +148,7 @@ contains end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_airfoil_levelset @@ -255,7 +255,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_3D_airfoil_levelset @@ -332,7 +332,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_rectangle_levelset @@ -441,7 +441,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_cuboid_levelset @@ -479,7 +479,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_sphere_levelset @@ -560,7 +560,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_cylinder_levelset diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index e37160248..efd20a628 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -56,7 +56,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_fd_divergence diff --git a/src/common/m_ib_patches.fpp b/src/common/m_ib_patches.fpp index 314ed0dfd..7ec8c8f1e 100644 --- a/src/common/m_ib_patches.fpp +++ b/src/common/m_ib_patches.fpp @@ -173,7 +173,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_circle @@ -325,7 +325,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_airfoil @@ -475,7 +475,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_3D_airfoil @@ -541,7 +541,7 @@ contains end if end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_rectangle @@ -602,7 +602,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_sphere @@ -673,7 +673,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_cuboid @@ -758,7 +758,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_cylinder diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index d2e6767e4..21594edce 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -768,7 +768,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') @@ -785,7 +785,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do l = 0, p @@ -801,7 +801,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') @@ -817,7 +817,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') @@ -835,7 +835,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 @@ -852,7 +852,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:else $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') @@ -868,7 +868,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') @@ -886,7 +886,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 @@ -903,7 +903,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endif end if @@ -976,7 +976,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') @@ -993,7 +993,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do l = 0, p @@ -1009,7 +1009,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') @@ -1031,7 +1031,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') @@ -1049,7 +1049,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 @@ -1066,7 +1066,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:else ! Unpacking buffer from bc_z%beg @@ -1090,7 +1090,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') @@ -1109,7 +1109,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') do i = nVar + 1, nVar + 4 @@ -1127,7 +1127,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endif end if diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 47fbb126c..235792978 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -269,7 +269,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_infinite_relaxation_k diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 359af5b27..94e213608 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -1165,7 +1165,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_convert_conservative_to_primitive_variables @@ -1598,7 +1598,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #endif end subroutine s_convert_primitive_to_flux_variables diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 22c8d1a58..d17d2d041 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -178,7 +178,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! Keep outer loop sequel because different sources can have very different number of points do ai = 1, num_source @@ -317,7 +317,7 @@ contains end if end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end do @@ -338,7 +338,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_acoustic_src_calculations !> This subroutine gives the temporally varying amplitude of the pulse diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 9bfb37632..5eabe3519 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -85,7 +85,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_mixture_density @@ -114,7 +114,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (bf_x) then ! x-direction body forces @@ -129,7 +129,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bf_y) then ! y-direction body forces @@ -145,7 +145,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bf_z) then ! z-direction body forces @@ -161,7 +161,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 604fbb4de..3a7fc2fc9 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -89,7 +89,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_comp_alpha_from_n @@ -116,7 +116,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if elseif (idir == 2) then @@ -132,7 +132,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then @@ -147,7 +147,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -193,10 +193,10 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, private='[Rtmp, Vtmp, myalpha_rho, myalpha]', & + $:GPU_PARALLEL_LOOP(private='[j,k,l,Rtmp, Vtmp, myalpha_rho, myalpha]', collapse=3, & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') do l = 0, p @@ -326,7 +326,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") @@ -350,7 +350,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_bubble_EE_source diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index da3db0994..2fc41a880 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -609,7 +609,7 @@ contains bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) end if end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Radial motion model @@ -680,7 +680,7 @@ contains adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") @@ -692,7 +692,7 @@ contains mtn_dveldt(k, l, stage) = 0._wp end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -732,7 +732,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do k = 0, p @@ -748,7 +748,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if do l = 1, num_dims @@ -769,7 +769,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() !source in energy $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) @@ -780,7 +780,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) @@ -797,7 +797,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end do end if @@ -853,7 +853,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & mtn_s, mtn_pos, q_beta) @@ -870,7 +870,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -1111,7 +1111,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1124,7 +1124,7 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then - $:GPU_PARALLEL_LOOP(rivate='[k]') + $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs !u{1} = u{n} + dt * RHS{n} intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) @@ -1134,7 +1134,7 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') @@ -1147,7 +1147,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1172,7 +1172,7 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') @@ -1185,7 +1185,7 @@ contains gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (stage == 3) then $:GPU_PARALLEL_LOOP(private='[k]') do k = 1, nBubs @@ -1197,7 +1197,7 @@ contains gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() call s_write_void_evol(mytime) @@ -1285,7 +1285,7 @@ contains mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_transfer_data_to_tmp @@ -1385,7 +1385,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (dir == 2) then ! Gradient in y dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) @@ -1400,7 +1400,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (dir == 3) then ! Gradient in z dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) @@ -1415,7 +1415,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_gradient_dir @@ -1524,7 +1524,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #ifdef MFC_MPI if (num_procs > 1) then @@ -1715,7 +1715,7 @@ contains Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_calculate_lag_bubble_stats diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index bd592acdc..7ee813bed 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -90,7 +90,7 @@ contains updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 end if end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_deltafunc @@ -196,7 +196,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_gaussian diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 7106fdf09..29332f0c5 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -721,7 +721,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) do i = advxb, advxe @@ -734,7 +734,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 else @@ -762,7 +762,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) do i = advxb, advxe @@ -783,7 +783,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -1126,7 +1126,7 @@ contains end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1198,7 +1198,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1210,7 +1210,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index @@ -1224,7 +1224,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1235,7 +1235,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) @@ -1249,7 +1249,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1261,7 +1261,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Inputted Data in x-direction @@ -1280,7 +1280,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1292,7 +1292,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index @@ -1306,7 +1306,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1317,7 +1317,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) @@ -1331,7 +1331,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1343,7 +1343,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Inputted Data in y-direction @@ -1362,7 +1362,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1374,7 +1374,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) do i = 1, flux_cbc_index @@ -1388,7 +1388,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1399,7 +1399,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) @@ -1413,7 +1413,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1425,7 +1425,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1467,7 +1467,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end do k = is2%beg, is2%end @@ -1477,7 +1477,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) @@ -1491,7 +1491,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1503,7 +1503,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Outputted Data in x-direction @@ -1522,7 +1522,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1533,7 +1533,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) @@ -1547,7 +1547,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1559,7 +1559,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Outputted Data in y-direction @@ -1579,7 +1579,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1590,7 +1590,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) @@ -1604,7 +1604,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) do r = is3%beg, is3%end @@ -1616,7 +1616,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 542af075a..aabf1f1c8 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -296,7 +296,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index 14b08c84f..93151024c 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -163,7 +163,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_UPDATE(host='[accel_mag]') @@ -215,7 +215,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (n == 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) @@ -230,7 +230,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p @@ -246,7 +246,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) @@ -265,7 +265,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p @@ -283,7 +283,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if ! Computing the acceleration component in the y-coordinate direction @@ -299,7 +299,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p == 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) @@ -316,7 +316,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) @@ -336,7 +336,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p @@ -354,7 +354,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if ! Computing the acceleration component in the z-coordinate direction @@ -370,7 +370,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) @@ -391,7 +391,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p @@ -409,7 +409,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -458,7 +458,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then !2D simulation $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') do l = 0, p !Loop over grid @@ -483,7 +483,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else !3D simulation $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') do l = 0, p !Loop over grid @@ -512,7 +512,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if $:GPU_UPDATE(host='[c_m]') diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 7f7cc6582..975e32f89 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -149,7 +149,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size @@ -159,7 +159,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:if not USING_NVHPC p_real => data_real_gpu @@ -187,7 +187,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -207,7 +207,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings @@ -219,7 +219,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') do k = 1, sys_size @@ -229,7 +229,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') #if defined(__PGI) @@ -251,7 +251,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') #if defined(__PGI) @@ -271,7 +271,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end do #:endcall GPU_DATA diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 581b5f7bf..ef85f272e 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -207,7 +207,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_hyperelastic_rmt_stress_update !> The following subroutine handles the calculation of the btensor. diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index b973d90b5..36d47213a 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -112,7 +112,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p @@ -127,7 +127,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (ndirs > 1) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) @@ -138,7 +138,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p @@ -156,7 +156,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! 3D if (ndirs == 3) then @@ -170,7 +170,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p @@ -192,7 +192,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -218,7 +218,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! apply rhs source term to elastic stress equation $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) @@ -233,7 +233,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) @@ -269,7 +269,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) @@ -336,7 +336,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord .and. idir == 2) then @@ -369,7 +369,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -408,10 +408,10 @@ contains do k = 0, m rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then q = 0 - $:GPU_PARALLEL_LOOP(private='[k,l]', copyin='[q]' collapse=2) + $:GPU_PARALLEL_LOOP(private='[k,l]', copyin='[q]', collapse=2) do l = 0, n do k = 0, m ! Maximum principal stress @@ -424,7 +424,7 @@ contains rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) do q = 0, p @@ -463,7 +463,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_damage_state diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 5b32e68d6..4501b52f5 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -365,7 +365,7 @@ contains end do end if end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if !Correct the state of the inner points in IBs @@ -383,7 +383,7 @@ contains q_cons_vf(q)%sf(j, k, l) = 0._wp end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_ibm_correct_state diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 46cc0b385..a5e757f06 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -171,7 +171,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p == 0) then alf_igr = alf_factor*max(dx(1), dy(1))**2._wp @@ -307,7 +307,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_populate_F_igr_buffers(bc_type, jac_sf) @@ -320,7 +320,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end do @@ -391,7 +391,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_igr_sigma_x @@ -797,7 +797,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p @@ -1268,7 +1268,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if else if (idir == 2) then if (p == 0) then @@ -1639,7 +1639,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p @@ -2095,7 +2095,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if elseif (idir == 3) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') @@ -2551,7 +2551,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_igr_riemann_solver @@ -2623,7 +2623,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -2638,7 +2638,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -2653,7 +2653,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_igr_flux_add diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index 4e46dd459..a4abbd9ef 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -129,7 +129,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 017906a7d..6dfddcc90 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -322,7 +322,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, p @@ -334,7 +334,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:else $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, buff_size - 1 @@ -346,7 +346,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:endif end if #:endfor @@ -399,7 +399,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') do l = 0, p @@ -411,7 +411,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:else ! Unpacking buffer from bc_z%beg $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') @@ -425,7 +425,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:endif end if #:endfor diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index 748aafb8c..b0e046fa9 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -129,7 +129,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() else if (muscl_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) @@ -142,7 +142,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() else if (muscl_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) @@ -155,7 +155,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() end if else if (muscl_order == 2) then @@ -212,7 +212,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() end if #:endfor end if @@ -296,7 +296,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() end if #:endfor @@ -328,7 +328,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in y-direction @@ -345,7 +345,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in z-direction @@ -361,7 +361,7 @@ contains end do end do end do - #:endcall + $:END_OMP_PARALLEL_LOOP() end if end subroutine s_initialize_muscl diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 1a3c2795c..5e404e7dd 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -78,7 +78,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_pressure_relaxation_procedure diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index cd160ec81..5e16b7c64 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -534,7 +534,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! The following block is not repeated and is left as is @@ -558,7 +558,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_qbmm_rhs @@ -851,7 +851,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() contains ! Helper to select the correct coefficient routine diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 71b3337d3..20abde824 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -576,7 +576,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! end allocation for .not. igr @@ -657,7 +657,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! Converting Conservative to Primitive Variables @@ -679,7 +679,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -749,7 +749,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if call nvtxStartRange("IGR_RIEMANN") @@ -982,7 +982,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Additional Physics and Source Temrs @@ -1047,7 +1047,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1102,7 +1102,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if select case (idir) @@ -1127,7 +1127,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k_loop,l_loop,q_loop,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') @@ -1147,7 +1147,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1173,7 +1173,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') @@ -1198,7 +1198,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord) then @@ -1215,7 +1215,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1244,7 +1244,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') do j = 1, sys_size do k = 0, p @@ -1258,7 +1258,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else ! Cartesian Coordinates $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') do j = 1, sys_size @@ -1273,7 +1273,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (model_eqns == 3) then @@ -1294,7 +1294,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if call s_add_directional_advection_source_terms(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, Kterm) @@ -1337,7 +1337,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then @@ -1352,7 +1352,7 @@ contains rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m @@ -1365,7 +1365,7 @@ contains rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') @@ -1379,7 +1379,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1401,7 +1401,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then @@ -1420,7 +1420,7 @@ contains (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) end if end do; end do; end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m @@ -1437,7 +1437,7 @@ contains (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) end if end do; end do; end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') @@ -1451,7 +1451,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1478,7 +1478,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then @@ -1493,7 +1493,7 @@ contains rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m @@ -1506,7 +1506,7 @@ contains rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed $:GPU_PARALLEL_LOOP(collapse=4, private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') @@ -1520,7 +1520,7 @@ contains local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do; end do; end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if end select @@ -1554,7 +1554,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then @@ -1591,7 +1591,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if elseif (idir == 2) then ! y-direction @@ -1609,7 +1609,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord .and. ((bc_y%beg == -2) .or. (bc_y%beg == -14))) then @@ -1642,7 +1642,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -1660,7 +1660,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else @@ -1697,7 +1697,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1720,7 +1720,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) @@ -1734,7 +1734,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if else @@ -1752,7 +1752,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1771,7 +1771,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then @@ -1807,7 +1807,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (grid_geometry == 3) then @@ -1827,7 +1827,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1950,7 +1950,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -1963,7 +1963,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -1976,7 +1976,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if #:endfor diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 252e02351..d2cce0820 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -974,7 +974,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1635,7 +1635,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1832,7 +1832,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -2383,7 +2383,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 4) then !ME4 @@ -2623,7 +2623,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 2 .and. bubbles_euler) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') @@ -3061,7 +3061,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else ! 5-EQUATION MODEL WITH HLLC $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') @@ -3541,7 +3541,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if #:endfor @@ -3827,7 +3827,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:endblock UNDEF_AMD end if #:endfor @@ -4040,7 +4040,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) @@ -4053,7 +4053,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) @@ -4066,7 +4066,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) @@ -4079,7 +4079,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4099,7 +4099,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then @@ -4113,7 +4113,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) @@ -4126,7 +4126,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) @@ -4139,7 +4139,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4162,7 +4162,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then @@ -4175,7 +4175,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe @@ -4186,7 +4186,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) @@ -4198,7 +4198,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4216,7 +4216,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then @@ -4229,7 +4229,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) do i = momxb, momxe @@ -4240,7 +4240,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) @@ -4252,7 +4252,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4273,7 +4273,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) @@ -4285,7 +4285,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end @@ -4295,7 +4295,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe do k = isy%beg, isy%end @@ -4305,7 +4305,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4321,7 +4321,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) @@ -4333,7 +4333,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe @@ -4344,7 +4344,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) do i = momxb, momxe @@ -4355,7 +4355,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4409,7 +4409,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then @@ -4425,7 +4425,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then @@ -4439,7 +4439,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Inputted Data in y-direction @@ -4456,7 +4456,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then @@ -4472,7 +4472,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then @@ -4486,7 +4486,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Inputted Data in z-direction @@ -4503,7 +4503,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then @@ -4519,7 +4519,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then @@ -4533,7 +4533,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -4696,7 +4696,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cylindrical_viscous_source_flux @@ -4831,7 +4831,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cartesian_viscous_source_flux @@ -4923,7 +4923,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (cyl_coord) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) @@ -4937,7 +4937,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) @@ -4949,7 +4949,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) @@ -4963,7 +4963,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Outputted Data in z-direction @@ -4980,7 +4980,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -4994,7 +4994,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) @@ -5006,7 +5006,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) @@ -5020,7 +5020,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if elseif (norm_dir == 1) then @@ -5035,7 +5035,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = is3%beg, is3%end @@ -5046,7 +5046,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) @@ -5060,7 +5060,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 28412f95e..c6ffa4a18 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -130,7 +130,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (id == 2) then @@ -176,7 +176,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (id == 3) then @@ -222,7 +222,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if @@ -252,7 +252,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = 0, p @@ -263,7 +263,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) @@ -275,7 +275,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) @@ -294,7 +294,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call s_populate_capillary_buffers(c_divs, bc_type) @@ -354,7 +354,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -367,7 +367,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -380,7 +380,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if #:endfor diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 110559bd1..9a8d38796 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -524,7 +524,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l,q]', collapse=5) @@ -552,7 +552,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bodyForces) call s_apply_bodyforces(q_cons_ts(1)%vf, q_prim_vf, rhs_vf, rk_coef(s, 3)*dt/rk_coef(s, 4)) @@ -699,7 +699,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') dt_local = minval(max_dt) @@ -741,7 +741,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -767,7 +767,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -779,7 +779,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -791,7 +791,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -803,7 +803,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else ! All other timesteps $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -818,7 +818,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_time_step_cycling diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index d1e45589d..56f52c239 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -91,7 +91,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (shear_stress) then ! Shear stresses $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') @@ -198,7 +198,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bulk_stress) then ! Bulk stresses @@ -301,7 +301,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (p == 0) return @@ -412,7 +412,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bulk_stress) then ! Bulk stresses @@ -513,7 +513,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_viscous_stress_tensor @@ -612,7 +612,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -628,7 +628,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (n > 0) then @@ -646,7 +646,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -662,7 +662,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -682,7 +682,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -703,7 +703,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -724,7 +724,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg, is3_viscous%end @@ -745,7 +745,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then @@ -764,7 +764,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 @@ -781,7 +781,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -803,7 +803,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -825,7 +825,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -847,7 +847,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = is3_viscous%beg + 1, is3_viscous%end - 1 @@ -869,7 +869,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end @@ -891,7 +891,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 @@ -913,7 +913,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg + 1, is3_viscous%end do l = is2_viscous%beg, is2_viscous%end @@ -934,7 +934,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do j = is3_viscous%beg, is3_viscous%end - 1 do l = is2_viscous%beg, is2_viscous%end @@ -954,7 +954,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() do i = iv%beg, iv%end call s_compute_fd_gradient(q_prim_qp%vf(i), & @@ -1058,7 +1058,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -1071,7 +1071,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -1084,7 +1084,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if end if @@ -1162,7 +1162,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -1175,7 +1175,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end @@ -1188,7 +1188,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if end if @@ -1260,7 +1260,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in x-direction @@ -1289,7 +1289,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in y-direction @@ -1318,7 +1318,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! END: First-Order Spatial Derivatives in z-direction @@ -1368,7 +1368,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) @@ -1381,7 +1381,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then @@ -1395,7 +1395,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) @@ -1409,7 +1409,7 @@ contains (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) do l = idwbuff(3)%beg, idwbuff(3)%end @@ -1422,7 +1422,7 @@ contains (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) do k = idwbuff(2)%beg, idwbuff(2)%end @@ -1435,7 +1435,7 @@ contains (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if @@ -1447,7 +1447,7 @@ contains (x_cc(2) - x_cc(0)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) @@ -1457,7 +1457,7 @@ contains (x_cc(m) - x_cc(m - 2)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then @@ -1468,7 +1468,7 @@ contains (y_cc(2) - y_cc(0)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) @@ -1478,7 +1478,7 @@ contains (y_cc(n) - y_cc(n - 2)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then @@ -1490,7 +1490,7 @@ contains (z_cc(2) - z_cc(0)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) @@ -1501,7 +1501,7 @@ contains (z_cc(p) - z_cc(p - 2)) end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 869e4ca29..5a3fd0aec 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -681,7 +681,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) @@ -694,7 +694,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, ubound(v_vf, 1) @@ -707,7 +707,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] @@ -785,7 +785,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endfor elseif (weno_order == 5) then @@ -900,7 +900,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() if (mp_weno) then call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & @@ -1096,7 +1096,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1149,7 +1149,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in y-direction @@ -1166,7 +1166,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if ! Reshaping/Projecting onto Characteristic Fields in z-direction @@ -1183,7 +1183,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end if end subroutine s_initialize_weno @@ -1233,7 +1233,7 @@ contains real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp - $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4,private='[d]') + $:GPU_PARALLEL_LOOP(private='[i,j,k,l,d]', collapse=4) do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -1358,7 +1358,7 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP + $:END_GPU_PARALLEL_LOOP() end subroutine s_preserve_monotonicity From ceec116b2653605a2f1650e054079d7e928ec491 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 11:26:17 -0500 Subject: [PATCH 07/33] Removed all warnings --- src/common/include/acc_macros.fpp | 2 +- src/common/include/shared_parallel_macros.fpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index d9c7ab2e9..771ee976d 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -305,4 +305,4 @@ #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n') $:acc_directive #:enddef -! New line at end of file is required for FYPP \ No newline at end of file +! New line at end of file is required for FYPP diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp index 61134a3df..a3a0b6f75 100644 --- a/src/common/include/shared_parallel_macros.fpp +++ b/src/common/include/shared_parallel_macros.fpp @@ -107,4 +107,4 @@ #:endif $:extraArgs_val #:enddef -! New line at end of file is required for FYPP \ No newline at end of file +! New line at end of file is required for FYPP From 0ce888302302eed480e2705114fa1f5cd1abc4f7 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 11:36:27 -0500 Subject: [PATCH 08/33] Spelling and Format --- src/common/include/parallel_macros.fpp | 2 +- src/common/m_boundary_common.fpp | 606 +-- src/common/m_chemistry.fpp | 248 +- src/common/m_compute_levelset.fpp | 520 +- src/common/m_finite_differences.fpp | 58 +- src/common/m_ib_patches.fpp | 350 +- src/common/m_mpi_common.fpp | 364 +- src/common/m_phase_change.fpp | 232 +- src/common/m_variables_conversion.fpp | 624 +-- src/simulation/m_acoustic_src.fpp | 198 +- src/simulation/m_body_forces.fpp | 76 +- src/simulation/m_bubbles_EE.fpp | 314 +- src/simulation/m_bubbles_EL.fpp | 482 +- src/simulation/m_bubbles_EL_kernels.fpp | 194 +- src/simulation/m_cbc.fpp | 1058 ++-- src/simulation/m_data_output.fpp | 22 +- src/simulation/m_derived_variables.fpp | 390 +- src/simulation/m_fftw.fpp | 84 +- src/simulation/m_hyperelastic.fpp | 182 +- src/simulation/m_hypoelastic.fpp | 476 +- src/simulation/m_ibm.fpp | 304 +- src/simulation/m_igr.fpp | 3412 ++++++------ src/simulation/m_mhd.fpp | 92 +- src/simulation/m_mpi_proxy.fpp | 82 +- src/simulation/m_muscl.fpp | 250 +- src/simulation/m_pressure_relaxation.fpp | 10 +- src/simulation/m_qbmm.fpp | 428 +- src/simulation/m_rhs.fpp | 966 ++-- src/simulation/m_riemann_solvers.fpp | 6090 +++++++++++----------- src/simulation/m_surface_tension.fpp | 274 +- src/simulation/m_time_steppers.fpp | 170 +- src/simulation/m_viscous.fpp | 1398 ++--- src/simulation/m_weno.fpp | 948 ++-- 33 files changed, 10452 insertions(+), 10452 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index e14020678..3a1f131b1 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -37,7 +37,7 @@ #:def END_GPU_PARALLEL_LOOP() #if defined(MFC_OpenACC) - #:set end_directive = '!$acc end parallel loop' + #:set end_directive = '!$acc end parallel loop' #elif defined(MFC_OpenMP) #:set end_directive = END_OMP_PARALLEL_LOOP() #endif diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 47434dfcb..892c32527 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -89,29 +89,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, -1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) - end if - end do + do l = 0, p + do k = 0, n + select case (int(bc_type(1, -1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, -1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -119,29 +119,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, 1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) - end if - end do + do l = 0, p + do k = 0, n + select case (int(bc_type(1, 1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end + call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -153,32 +153,32 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, -1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) - case (BC_AXIS) - call s_axis(q_prim_vf, pb_in, mv_in, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & - (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then - call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) - end if - end do + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, -1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) + case (BC_AXIS) + call s_axis(q_prim_vf, pb_in, mv_in, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, -1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, -1)%sf(k, 0, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -186,29 +186,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) - end if - end do + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -220,29 +220,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, -1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) - end if - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, -1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, -1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -250,29 +250,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, 1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_SlIP_WALL) - call s_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) - end if - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, 1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_SlIP_WALL) + call s_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if ! END: Population of Buffers in z-direction @@ -1166,18 +1166,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, -1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) - end select - end do + do l = 0, p + do k = 0, n + select case (bc_type(1, -1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1185,18 +1185,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) - end select - end do + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1207,18 +1207,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, -1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) - end select - end do + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, -1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1226,18 +1226,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) - end select - end do + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1248,18 +1248,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, -1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) - end select - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, -1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1267,18 +1267,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) - end select - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if end subroutine s_populate_capillary_buffers @@ -1483,24 +1483,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, -1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) - end do - end select - end do + do l = 0, p + do k = 0, n + select case (bc_type(1, -1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1509,24 +1509,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) - end do - end select - end do + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1537,24 +1537,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, -1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) - end do - end select - end do + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, -1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1563,24 +1563,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) - end do - end select - end do + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1590,24 +1590,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, -1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) - end do - end select - end do + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, -1)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1615,24 +1615,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) - end do - end select - end do + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 220b987f1..2e3d66209 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -130,35 +130,35 @@ contains real(wp), dimension(num_species) :: omega $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys, omega, T]') - do z = bounds(3)%beg, bounds(3)%end - do y = bounds(2)%beg, bounds(2)%end - do x = bounds(1)%beg, bounds(1)%end - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) - end do + do z = bounds(3)%beg, bounds(3)%end + do y = bounds(2)%beg, bounds(2)%end + do x = bounds(1)%beg, bounds(1)%end - rho = q_cons_qp(contxe)%sf(x, y, z) - T = q_T_sf%sf(x, y, z) + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Ys(eqn - chemxb + 1) = q_prim_qp(eqn)%sf(x, y, z) + end do - call get_net_production_rates(rho, T, Ys, omega) + rho = q_cons_qp(contxe)%sf(x, y, z) + T = q_T_sf%sf(x, y, z) - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - #:block UNDEF_AMD - omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - #:endblock UNDEF_AMD - #:block DEF_AMD - omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - #:endblock DEF_AMD - rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m + call get_net_production_rates(rho, T, Ys, omega) - end do + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + #:block UNDEF_AMD + omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + #:endblock UNDEF_AMD + #:block DEF_AMD + omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + #:endblock DEF_AMD + rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m end do + end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_chemistry_reaction_flux @@ -192,112 +192,112 @@ contains offsets(idir) = 1 $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') - do z = isc3%beg, isc3%end - do y = isc2%beg, isc2%end - do x = isc1%beg, isc1%end - ! Calculate grid spacing using direction-based indexing - select case (idir) - case (1) - grid_spacing = x_cc(x + 1) - x_cc(x) - case (2) - grid_spacing = y_cc(y + 1) - y_cc(y) - case (3) - grid_spacing = z_cc(z + 1) - z_cc(z) - end select - - ! Extract species mass fractions - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) - Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) - end do - - ! Calculate molecular weights and mole fractions - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - MW_cell = 0.5_wp*(MW_L + MW_R) - - call get_mole_fractions(MW_L, Ys_L, Xs_L) - call get_mole_fractions(MW_R, Ys_R, Xs_R) - - ! Calculate gas constants and thermodynamic properties - Rgas_L = gas_constant/MW_L - Rgas_R = gas_constant/MW_R - - P_L = q_prim_qp(E_idx)%sf(x, y, z) - P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - rho_L = q_prim_qp(1)%sf(x, y, z) - rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - T_L = P_L/rho_L/Rgas_L - T_R = P_R/rho_R/Rgas_R - - rho_cell = 0.5_wp*(rho_L + rho_R) - dT_dxi = (T_R - T_L)/grid_spacing - - ! Get transport properties - call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) - call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) - - call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) - call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) - - call get_species_enthalpies_rt(T_L, h_l) - call get_species_enthalpies_rt(T_R, h_r) - - ! Calculate species properties and gradients - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) - Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) - h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) - dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing - end do - - ! Calculate mixture-averaged diffusivities - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & - (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp - end do - - lambda_Cell = 0.5_wp*(lambda_R + lambda_L) - - ! Calculate mass diffusion fluxes - rho_Vic = 0.0_wp - Mass_Diffu_Energy = 0.0_wp - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) - rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) - Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) - end do - - ! Apply corrections for mass conservation - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic - Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) - end do - - ! Add thermal conduction contribution - Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy - - ! Update flux arrays - flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) - end do + do z = isc3%beg, isc3%end + do y = isc2%beg, isc2%end + do x = isc1%beg, isc1%end + ! Calculate grid spacing using direction-based indexing + select case (idir) + case (1) + grid_spacing = x_cc(x + 1) - x_cc(x) + case (2) + grid_spacing = y_cc(y + 1) - y_cc(y) + case (3) + grid_spacing = z_cc(z + 1) - z_cc(z) + end select + + ! Extract species mass fractions + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) + Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) + end do + + ! Calculate molecular weights and mole fractions + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + MW_cell = 0.5_wp*(MW_L + MW_R) + + call get_mole_fractions(MW_L, Ys_L, Xs_L) + call get_mole_fractions(MW_R, Ys_R, Xs_R) + + ! Calculate gas constants and thermodynamic properties + Rgas_L = gas_constant/MW_L + Rgas_R = gas_constant/MW_R + + P_L = q_prim_qp(E_idx)%sf(x, y, z) + P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + rho_L = q_prim_qp(1)%sf(x, y, z) + rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + T_L = P_L/rho_L/Rgas_L + T_R = P_R/rho_R/Rgas_R + + rho_cell = 0.5_wp*(rho_L + rho_R) + dT_dxi = (T_R - T_L)/grid_spacing + + ! Get transport properties + call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) + call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) + + call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) + call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) + + call get_species_enthalpies_rt(T_L, h_l) + call get_species_enthalpies_rt(T_R, h_r) + + ! Calculate species properties and gradients + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) + Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) + h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) + dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing + end do + + ! Calculate mixture-averaged diffusivities + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & + (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp + end do + + lambda_Cell = 0.5_wp*(lambda_R + lambda_L) + + ! Calculate mass diffusion fluxes + rho_Vic = 0.0_wp + Mass_Diffu_Energy = 0.0_wp + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) + Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) + end do + + ! Apply corrections for mass conservation + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) + end do + + ! Add thermal conduction contribution + Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy + + ! Update flux arrays + flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp index 095c542d5..f343c0c47 100644 --- a/src/common/m_compute_levelset.fpp +++ b/src/common/m_compute_levelset.fpp @@ -45,23 +45,23 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,dist_vec,dist]', & & copyin='[ib_patch_id,center,radius]', collapse=2) - do i = 0, m - do j = 0, n + do i = 0, m + do j = 0, n + + dist_vec(1) = x_cc(i) - center(1) + dist_vec(2) = y_cc(j) - center(2) + dist_vec(3) = 0._wp + dist = sqrt(sum(dist_vec**2)) + levelset%sf(i, j, 0, ib_patch_id) = dist - radius + if (f_approx_equal(dist, 0._wp)) then + levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0 + else + levelset_norm%sf(i, j, 0, ib_patch_id, :) = & + dist_vec(:)/dist + end if - dist_vec(1) = x_cc(i) - center(1) - dist_vec(2) = y_cc(j) - center(2) - dist_vec(3) = 0._wp - dist = sqrt(sum(dist_vec**2)) - levelset%sf(i, j, 0, ib_patch_id) = dist - radius - if (f_approx_equal(dist, 0._wp)) then - levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0 - else - levelset_norm%sf(i, j, 0, ib_patch_id, :) = & - dist_vec(:)/dist - end if - - end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_circle_levelset @@ -89,65 +89,65 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,dist_vec,dist,global_dist,global_id]', & & copyin='[ib_patch_id,center,rotation,inverse_rotation,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do i = 0, m - do j = 0, n - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB - xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinate - - if (xy_local(2) >= 0._wp) then - ! finds the location on the airfoil grid with the minimum distance (closest) - do k = 1, Np - dist_vec(1) = xy_local(1) - airfoil_grid_u(k)%x - dist_vec(2) = xy_local(2) - airfoil_grid_u(k)%y - dist_vec(3) = 0._wp - dist = sqrt(sum(dist_vec**2)) - if (k == 1) then + do i = 0, m + do j = 0, n + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB + xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinate + + if (xy_local(2) >= 0._wp) then + ! finds the location on the airfoil grid with the minimum distance (closest) + do k = 1, Np + dist_vec(1) = xy_local(1) - airfoil_grid_u(k)%x + dist_vec(2) = xy_local(2) - airfoil_grid_u(k)%y + dist_vec(3) = 0._wp + dist = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist + global_id = k + else + if (dist < global_dist) then global_dist = dist global_id = k - else - if (dist < global_dist) then - global_dist = dist - global_id = k - end if end if - end do - dist_vec(1) = xy_local(1) - airfoil_grid_u(global_id)%x - dist_vec(2) = xy_local(2) - airfoil_grid_u(global_id)%y + end if + end do + dist_vec(1) = xy_local(1) - airfoil_grid_u(global_id)%x + dist_vec(2) = xy_local(2) - airfoil_grid_u(global_id)%y + dist_vec(3) = 0 + dist = global_dist + else + ! TODO :: This looks identical to the above code but using the lower array. Refactor this. + do k = 1, Np + dist_vec(1) = xy_local(1) - airfoil_grid_l(k)%x + dist_vec(2) = xy_local(2) - airfoil_grid_l(k)%y dist_vec(3) = 0 - dist = global_dist - else - ! TODO :: This looks identical to the above code but using the lower array. Refactor this. - do k = 1, Np - dist_vec(1) = xy_local(1) - airfoil_grid_l(k)%x - dist_vec(2) = xy_local(2) - airfoil_grid_l(k)%y - dist_vec(3) = 0 - dist = sqrt(sum(dist_vec**2)) - if (k == 1) then + dist = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist + global_id = k + else + if (dist < global_dist) then global_dist = dist global_id = k - else - if (dist < global_dist) then - global_dist = dist - global_id = k - end if end if - end do - dist_vec(1) = xy_local(1) - airfoil_grid_l(global_id)%x - dist_vec(2) = xy_local(2) - airfoil_grid_l(global_id)%y - dist_vec(3) = 0 - dist = global_dist - end if - - levelset%sf(i, j, 0, ib_patch_id) = dist - if (f_approx_equal(dist, 0._wp)) then - levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp - else - levelset_norm%sf(i, j, 0, ib_patch_id, :) = & - matmul(rotation, dist_vec(:))/dist ! convert the normal vector back to global grid coordinates - end if + end if + end do + dist_vec(1) = xy_local(1) - airfoil_grid_l(global_id)%x + dist_vec(2) = xy_local(2) - airfoil_grid_l(global_id)%y + dist_vec(3) = 0 + dist = global_dist + end if + + levelset%sf(i, j, 0, ib_patch_id) = dist + if (f_approx_equal(dist, 0._wp)) then + levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp + else + levelset_norm%sf(i, j, 0, ib_patch_id, :) = & + matmul(rotation, dist_vec(:))/dist ! convert the normal vector back to global grid coordinates + end if - end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_airfoil_levelset @@ -182,79 +182,79 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,dist_vec,dist,global_dist,global_id,dist_side,dist_surf]', & & copyin='[ib_patch_id,center,rotation,inverse_rotation,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) - do l = 0, p - do j = 0, n - do i = 0, m - - xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (xyz_local(2) >= center(2)) then - do k = 1, Np - dist_vec(1) = xyz_local(1) - airfoil_grid_u(k)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_u(k)%y - dist_vec(3) = 0 - dist_surf = sqrt(sum(dist_vec**2)) - if (k == 1) then + do l = 0, p + do j = 0, n + do i = 0, m + + xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (xyz_local(2) >= center(2)) then + do k = 1, Np + dist_vec(1) = xyz_local(1) - airfoil_grid_u(k)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_u(k)%y + dist_vec(3) = 0 + dist_surf = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist_surf + global_id = k + else + if (dist_surf < global_dist) then global_dist = dist_surf global_id = k - else - if (dist_surf < global_dist) then - global_dist = dist_surf - global_id = k - end if end if - end do - dist_vec(1) = xyz_local(1) - airfoil_grid_u(global_id)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_u(global_id)%y + end if + end do + dist_vec(1) = xyz_local(1) - airfoil_grid_u(global_id)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_u(global_id)%y + dist_vec(3) = 0 + dist_surf = global_dist + else + do k = 1, Np + dist_vec(1) = xyz_local(1) - airfoil_grid_l(k)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_l(k)%y dist_vec(3) = 0 - dist_surf = global_dist - else - do k = 1, Np - dist_vec(1) = xyz_local(1) - airfoil_grid_l(k)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_l(k)%y - dist_vec(3) = 0 - dist_surf = sqrt(sum(dist_vec**2)) - if (k == 1) then + dist_surf = sqrt(sum(dist_vec**2)) + if (k == 1) then + global_dist = dist_surf + global_id = k + else + if (dist_surf < global_dist) then global_dist = dist_surf global_id = k - else - if (dist_surf < global_dist) then - global_dist = dist_surf - global_id = k - end if end if - end do - dist_vec(1) = xyz_local(1) - airfoil_grid_l(global_id)%x - dist_vec(2) = xyz_local(2) - airfoil_grid_l(global_id)%y - dist_vec(3) = 0 - dist_surf = global_dist - end if + end if + end do + dist_vec(1) = xyz_local(1) - airfoil_grid_l(global_id)%x + dist_vec(2) = xyz_local(2) - airfoil_grid_l(global_id)%y + dist_vec(3) = 0 + dist_surf = global_dist + end if - dist_side = min(abs(z_cc(l) - z_min), abs(z_max - z_cc(l))) + dist_side = min(abs(z_cc(l) - z_min), abs(z_max - z_cc(l))) - if (dist_side < dist_surf) then - levelset%sf(i, j, l, ib_patch_id) = dist_side - if (f_approx_equal(dist_side, abs(z_cc(l) - z_min))) then - levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, -1/) - else - levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, 1/) - end if - levelset_norm%sf(i, j, l, ib_patch_id, :) = & - matmul(rotation, levelset_norm%sf(i, j, l, ib_patch_id, :)/dist_surf) + if (dist_side < dist_surf) then + levelset%sf(i, j, l, ib_patch_id) = dist_side + if (f_approx_equal(dist_side, abs(z_cc(l) - z_min))) then + levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, -1/) else - levelset%sf(i, j, l, ib_patch_id) = dist_surf - if (f_approx_equal(dist_surf, 0._wp)) then - levelset_norm%sf(i, j, l, ib_patch_id, :) = 0 - else - levelset_norm%sf(i, j, l, ib_patch_id, :) = & - matmul(rotation, dist_vec(:)/dist_surf) - end if + levelset_norm%sf(i, j, l, ib_patch_id, :) = (/0, 0, 1/) end if + levelset_norm%sf(i, j, l, ib_patch_id, :) = & + matmul(rotation, levelset_norm%sf(i, j, l, ib_patch_id, :)/dist_surf) + else + levelset%sf(i, j, l, ib_patch_id) = dist_surf + if (f_approx_equal(dist_surf, 0._wp)) then + levelset_norm%sf(i, j, l, ib_patch_id, :) = 0 + else + levelset_norm%sf(i, j, l, ib_patch_id, :) = & + matmul(rotation, dist_vec(:)/dist_surf) + end if + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_3D_airfoil_levelset @@ -292,46 +292,46 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,idx,side_dists,xy_local,dist_vec]', & & copyin='[ib_patch_id,center,bottom_left,top_right,inverse_rotation,rotation]', collapse=2) - do i = 0, m - do j = 0, n - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - xy_local = matmul(inverse_rotation, xy_local) - - if ((xy_local(1) > bottom_left(1) .and. xy_local(1) < top_right(1)) .or. & - (xy_local(2) > bottom_left(2) .and. xy_local(2) < top_right(2))) then - - side_dists(1) = bottom_left(1) - xy_local(1) - side_dists(2) = top_right(1) - xy_local(1) - side_dists(3) = bottom_left(2) - xy_local(2) - side_dists(4) = top_right(2) - xy_local(2) - min_dist = side_dists(1) - idx = 1 - - do k = 2, 4 - if (abs(side_dists(k)) < abs(min_dist)) then - idx = k - min_dist = side_dists(idx) - end if - end do + do i = 0, m + do j = 0, n + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) + + if ((xy_local(1) > bottom_left(1) .and. xy_local(1) < top_right(1)) .or. & + (xy_local(2) > bottom_left(2) .and. xy_local(2) < top_right(2))) then + + side_dists(1) = bottom_left(1) - xy_local(1) + side_dists(2) = top_right(1) - xy_local(1) + side_dists(3) = bottom_left(2) - xy_local(2) + side_dists(4) = top_right(2) - xy_local(2) + min_dist = side_dists(1) + idx = 1 + + do k = 2, 4 + if (abs(side_dists(k)) < abs(min_dist)) then + idx = k + min_dist = side_dists(idx) + end if + end do - levelset%sf(i, j, 0, ib_patch_id) = side_dists(idx) - dist_vec = 0._wp - if (.not. f_approx_equal(side_dists(idx), 0._wp)) then - if (idx == 1 .or. idx == 2) then - ! vector points along the x axis - dist_vec(1) = side_dists(idx)/abs(side_dists(idx)) - else - ! vector points along the y axis - dist_vec(2) = side_dists(idx)/abs(side_dists(idx)) - end if - ! convert the normal vector back into the global coordinate system - levelset_norm%sf(i, j, 0, ib_patch_id, :) = matmul(rotation, dist_vec) + levelset%sf(i, j, 0, ib_patch_id) = side_dists(idx) + dist_vec = 0._wp + if (.not. f_approx_equal(side_dists(idx), 0._wp)) then + if (idx == 1 .or. idx == 2) then + ! vector points along the x axis + dist_vec(1) = side_dists(idx)/abs(side_dists(idx)) else - levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp + ! vector points along the y axis + dist_vec(2) = side_dists(idx)/abs(side_dists(idx)) end if + ! convert the normal vector back into the global coordinate system + levelset_norm%sf(i, j, 0, ib_patch_id, :) = matmul(rotation, dist_vec) + else + levelset_norm%sf(i, j, 0, ib_patch_id, :) = 0._wp end if - end do + end if end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_rectangle_levelset @@ -373,74 +373,74 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,k,min_dist,side_dists,xyz_local,dist_vec]', & & copyin='[ib_patch_id,center,inverse_rotation,rotation,Right,Left,Top,Bottom,Front,Back]', collapse=3) - do i = 0, m - do j = 0, n - do k = 0, p - - xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinate - - if ((xyz_local(1) > Left .and. xyz_local(1) < Right) .or. & - (xyz_local(2) > Bottom .and. xyz_local(2) < Top) .or. & - (xyz_local(3) > Back .and. xyz_local(3) < Front)) then - - side_dists(1) = Left - xyz_local(1) - side_dists(2) = xyz_local(1) - Right - side_dists(3) = Bottom - xyz_local(2) - side_dists(4) = xyz_local(2) - Top - side_dists(5) = Back - xyz_local(3) - side_dists(6) = xyz_local(3) - Front - min_dist = minval(abs(side_dists)) - - ! TODO :: The way that this is written, it looks like we will - ! trigger at the first size that is close to the minimum distance, - ! meaning corners where side_dists are the same will - ! trigger on what may not actually be the minimum, - ! leading to undesired behavior. This should be resolved - ! and this code should be cleaned up. It also means that - ! rotating the box 90 degrees will cause tests to fail. - dist_vec = 0._wp - if (f_approx_equal(min_dist, abs(side_dists(1)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(1) - if (.not. f_approx_equal(side_dists(1), 0._wp)) then - dist_vec(1) = side_dists(1)/abs(side_dists(1)) - end if + do i = 0, m + do j = 0, n + do k = 0, p + + xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinate + + if ((xyz_local(1) > Left .and. xyz_local(1) < Right) .or. & + (xyz_local(2) > Bottom .and. xyz_local(2) < Top) .or. & + (xyz_local(3) > Back .and. xyz_local(3) < Front)) then + + side_dists(1) = Left - xyz_local(1) + side_dists(2) = xyz_local(1) - Right + side_dists(3) = Bottom - xyz_local(2) + side_dists(4) = xyz_local(2) - Top + side_dists(5) = Back - xyz_local(3) + side_dists(6) = xyz_local(3) - Front + min_dist = minval(abs(side_dists)) + + ! TODO :: The way that this is written, it looks like we will + ! trigger at the first size that is close to the minimum distance, + ! meaning corners where side_dists are the same will + ! trigger on what may not actually be the minimum, + ! leading to undesired behavior. This should be resolved + ! and this code should be cleaned up. It also means that + ! rotating the box 90 degrees will cause tests to fail. + dist_vec = 0._wp + if (f_approx_equal(min_dist, abs(side_dists(1)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(1) + if (.not. f_approx_equal(side_dists(1), 0._wp)) then + dist_vec(1) = side_dists(1)/abs(side_dists(1)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(2)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(2) - if (.not. f_approx_equal(side_dists(2), 0._wp)) then - dist_vec(1) = -side_dists(2)/abs(side_dists(2)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(2)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(2) + if (.not. f_approx_equal(side_dists(2), 0._wp)) then + dist_vec(1) = -side_dists(2)/abs(side_dists(2)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(3)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(3) - if (.not. f_approx_equal(side_dists(3), 0._wp)) then - dist_vec(2) = side_dists(3)/abs(side_dists(3)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(3)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(3) + if (.not. f_approx_equal(side_dists(3), 0._wp)) then + dist_vec(2) = side_dists(3)/abs(side_dists(3)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(4)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(4) - if (.not. f_approx_equal(side_dists(4), 0._wp)) then - dist_vec(2) = -side_dists(4)/abs(side_dists(4)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(4)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(4) + if (.not. f_approx_equal(side_dists(4), 0._wp)) then + dist_vec(2) = -side_dists(4)/abs(side_dists(4)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(5)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(5) - if (.not. f_approx_equal(side_dists(5), 0._wp)) then - dist_vec(3) = side_dists(5)/abs(side_dists(5)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(5)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(5) + if (.not. f_approx_equal(side_dists(5), 0._wp)) then + dist_vec(3) = side_dists(5)/abs(side_dists(5)) + end if - else if (f_approx_equal(min_dist, abs(side_dists(6)))) then - levelset%sf(i, j, k, ib_patch_id) = side_dists(6) - if (.not. f_approx_equal(side_dists(6), 0._wp)) then - dist_vec(3) = -side_dists(6)/abs(side_dists(6)) - end if + else if (f_approx_equal(min_dist, abs(side_dists(6)))) then + levelset%sf(i, j, k, ib_patch_id) = side_dists(6) + if (.not. f_approx_equal(side_dists(6), 0._wp)) then + dist_vec(3) = -side_dists(6)/abs(side_dists(6)) end if - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_vec) end if - end do + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_vec) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_cuboid_levelset @@ -463,22 +463,22 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,k,dist_vec,dist]', & & copyin='[ib_patch_id,center,radius]', collapse=3) - do i = 0, m - do j = 0, n - do k = 0, p - dist_vec(1) = x_cc(i) - center(1) - dist_vec(2) = y_cc(j) - center(2) - dist_vec(3) = z_cc(k) - center(3) - dist = sqrt(sum(dist_vec**2)) - levelset%sf(i, j, k, ib_patch_id) = dist - radius - if (f_approx_equal(dist, 0._wp)) then - levelset_norm%sf(i, j, k, ib_patch_id, :) = (/1, 0, 0/) - else - levelset_norm%sf(i, j, k, ib_patch_id, :) = dist_vec(:)/dist - end if - end do + do i = 0, m + do j = 0, n + do k = 0, p + dist_vec(1) = x_cc(i) - center(1) + dist_vec(2) = y_cc(j) - center(2) + dist_vec(3) = z_cc(k) - center(3) + dist = sqrt(sum(dist_vec**2)) + levelset%sf(i, j, k, ib_patch_id) = dist - radius + if (f_approx_equal(dist, 0._wp)) then + levelset_norm%sf(i, j, k, ib_patch_id, :) = (/1, 0, 0/) + else + levelset_norm%sf(i, j, k, ib_patch_id, :) = dist_vec(:)/dist + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_sphere_levelset @@ -528,38 +528,38 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,k,side_pos,dist_side,dist_surface,xyz_local]', & & copyin='[ib_patch_id,center,radius,inverse_rotation,rotation,dist_sides_vec,dist_surface_vec]', collapse=3) - do i = 0, m - do j = 0, n - do k = 0, p - xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - ! get distance to flat edge of cylinder - side_pos = dot_product(xyz_local, dist_sides_vec) - dist_side = min(abs(side_pos - boundary(1)), & - abs(boundary(2) - side_pos)) - ! get distance to curved side of cylinder - dist_surface = norm2(xyz_local*dist_surface_vec) & - - radius - - if (dist_side < abs(dist_surface)) then - ! if the closest edge is flat - levelset%sf(i, j, k, ib_patch_id) = -dist_side - if (f_approx_equal(dist_side, abs(side_pos - boundary(1)))) then - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, -dist_sides_vec) - else - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_sides_vec) - end if + do i = 0, m + do j = 0, n + do k = 0, p + xyz_local = [x_cc(i), y_cc(j), z_cc(k)] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + ! get distance to flat edge of cylinder + side_pos = dot_product(xyz_local, dist_sides_vec) + dist_side = min(abs(side_pos - boundary(1)), & + abs(boundary(2) - side_pos)) + ! get distance to curved side of cylinder + dist_surface = norm2(xyz_local*dist_surface_vec) & + - radius + + if (dist_side < abs(dist_surface)) then + ! if the closest edge is flat + levelset%sf(i, j, k, ib_patch_id) = -dist_side + if (f_approx_equal(dist_side, abs(side_pos - boundary(1)))) then + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, -dist_sides_vec) else - levelset%sf(i, j, k, ib_patch_id) = dist_surface - - xyz_local = xyz_local*dist_surface_vec - xyz_local = xyz_local/norm2(xyz_local) - levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, xyz_local) + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, dist_sides_vec) end if - end do + else + levelset%sf(i, j, k, ib_patch_id) = dist_surface + + xyz_local = xyz_local*dist_surface_vec + xyz_local = xyz_local/norm2(xyz_local) + levelset_norm%sf(i, j, k, ib_patch_id, :) = matmul(rotation, xyz_local) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_cylinder_levelset diff --git a/src/common/m_finite_differences.fpp b/src/common/m_finite_differences.fpp index efd20a628..f85d46944 100644 --- a/src/common/m_finite_differences.fpp +++ b/src/common/m_finite_differences.fpp @@ -19,43 +19,43 @@ contains real(wp) :: divergence $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,divergence]') - do x = ix_s%beg, ix_s%end - do y = iy_s%beg, iy_s%end - do z = iz_s%beg, iz_s%end - - if (x == ix_s%beg) then - divergence = (-3._wp*fields(1)%sf(x, y, z) + 4._wp*fields(1)%sf(x + 1, y, z) - fields(1)%sf(x + 2, y, z))/(x_cc(x + 2) - x_cc(x)) - else if (x == ix_s%end) then - divergence = (+3._wp*fields(1)%sf(x, y, z) - 4._wp*fields(1)%sf(x - 1, y, z) + fields(1)%sf(x - 2, y, z))/(x_cc(x) - x_cc(x - 2)) + do x = ix_s%beg, ix_s%end + do y = iy_s%beg, iy_s%end + do z = iz_s%beg, iz_s%end + + if (x == ix_s%beg) then + divergence = (-3._wp*fields(1)%sf(x, y, z) + 4._wp*fields(1)%sf(x + 1, y, z) - fields(1)%sf(x + 2, y, z))/(x_cc(x + 2) - x_cc(x)) + else if (x == ix_s%end) then + divergence = (+3._wp*fields(1)%sf(x, y, z) - 4._wp*fields(1)%sf(x - 1, y, z) + fields(1)%sf(x - 2, y, z))/(x_cc(x) - x_cc(x - 2)) + else + divergence = (fields(1)%sf(x + 1, y, z) - fields(1)%sf(x - 1, y, z))/(x_cc(x + 1) - x_cc(x - 1)) + end if + + if (n > 0) then + if (y == iy_s%beg) then + divergence = divergence + (-3._wp*fields(2)%sf(x, y, z) + 4._wp*fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y + 2, z))/(y_cc(y + 2) - y_cc(y)) + else if (y == iy_s%end) then + divergence = divergence + (+3._wp*fields(2)%sf(x, y, z) - 4._wp*fields(2)%sf(x, y - 1, z) + fields(2)%sf(x, y - 2, z))/(y_cc(y) - y_cc(y - 2)) else - divergence = (fields(1)%sf(x + 1, y, z) - fields(1)%sf(x - 1, y, z))/(x_cc(x + 1) - x_cc(x - 1)) + divergence = divergence + (fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y - 1, z))/(y_cc(y + 1) - y_cc(y - 1)) end if + end if - if (n > 0) then - if (y == iy_s%beg) then - divergence = divergence + (-3._wp*fields(2)%sf(x, y, z) + 4._wp*fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y + 2, z))/(y_cc(y + 2) - y_cc(y)) - else if (y == iy_s%end) then - divergence = divergence + (+3._wp*fields(2)%sf(x, y, z) - 4._wp*fields(2)%sf(x, y - 1, z) + fields(2)%sf(x, y - 2, z))/(y_cc(y) - y_cc(y - 2)) - else - divergence = divergence + (fields(2)%sf(x, y + 1, z) - fields(2)%sf(x, y - 1, z))/(y_cc(y + 1) - y_cc(y - 1)) - end if - end if - - if (p > 0) then - if (z == iz_s%beg) then - divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z)) - else if (z == iz_s%end) then - divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2)) - else - divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1)) - end if + if (p > 0) then + if (z == iz_s%beg) then + divergence = divergence + (-3._wp*fields(3)%sf(x, y, z) + 4._wp*fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z + 2))/(z_cc(z + 2) - z_cc(z)) + else if (z == iz_s%end) then + divergence = divergence + (+3._wp*fields(3)%sf(x, y, z) - 4._wp*fields(3)%sf(x, y, z - 1) + fields(2)%sf(x, y, z - 2))/(z_cc(z) - z_cc(z - 2)) + else + divergence = divergence + (fields(3)%sf(x, y, z + 1) - fields(3)%sf(x, y, z - 1))/(z_cc(z + 1) - z_cc(z - 1)) end if + end if - div%sf(x, y, z) = div%sf(x, y, z) + divergence + div%sf(x, y, z) = div%sf(x, y, z) + divergence - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_fd_divergence diff --git a/src/common/m_ib_patches.fpp b/src/common/m_ib_patches.fpp index 7ec8c8f1e..c0d95e6f1 100644 --- a/src/common/m_ib_patches.fpp +++ b/src/common/m_ib_patches.fpp @@ -164,15 +164,15 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,radius]', collapse=2) - do j = 0, n - do i = 0, m - if ((x_cc(i) - center(1))**2 & - + (y_cc(j) - center(2))**2 <= radius**2) & - then - ib_markers_sf(i, j, 0) = patch_id - end if - end do + do j = 0, n + do i = 0, m + if ((x_cc(i) - center(1))**2 & + + (y_cc(j) - center(2))**2 <= radius**2) & + then + ib_markers_sf(i, j, 0) = patch_id + end if end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_circle @@ -272,59 +272,59 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,inverse_rotation,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do j = 0, n - do i = 0, m - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB - xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates - - if (xy_local(1) >= 0._wp .and. xy_local(1) <= ca_in) then - xa = xy_local(1)/ca_in - if (xa <= pa) then - yc = (ma/pa**2)*(2*pa*xa - xa**2) - dycdxc = (2*ma/pa**2)*(pa - xa) + do j = 0, n + do i = 0, m + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB + xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates + + if (xy_local(1) >= 0._wp .and. xy_local(1) <= ca_in) then + xa = xy_local(1)/ca_in + if (xa <= pa) then + yc = (ma/pa**2)*(2*pa*xa - xa**2) + dycdxc = (2*ma/pa**2)*(pa - xa) + else + yc = (ma/(1 - pa)**2)*(1 - 2*pa + 2*pa*xa - xa**2) + dycdxc = (2*ma/(1 - pa)**2)*(pa - xa) + end if + if (xy_local(2) >= 0._wp) then + k = 1 + do while (airfoil_grid_u(k)%x < xy_local(1) .and. k <= Np) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then + if (xy_local(2) <= airfoil_grid_u(k)%y) then + !!IB + ib_markers_sf(i, j, 0) = patch_id + end if else - yc = (ma/(1 - pa)**2)*(1 - 2*pa + 2*pa*xa - xa**2) - dycdxc = (2*ma/(1 - pa)**2)*(pa - xa) - end if - if (xy_local(2) >= 0._wp) then - k = 1 - do while (airfoil_grid_u(k)%x < xy_local(1) .and. k <= Np) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then - if (xy_local(2) <= airfoil_grid_u(k)%y) then + f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) + if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id - end if - else - f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) - if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then + ib_markers_sf(i, j, 0) = patch_id + end if + end if + else + k = 1 + do while (airfoil_grid_l(k)%x < xy_local(1)) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then + if (xy_local(2) >= airfoil_grid_l(k)%y) then !!IB - ib_markers_sf(i, j, 0) = patch_id - end if + ib_markers_sf(i, j, 0) = patch_id end if else - k = 1 - do while (airfoil_grid_l(k)%x < xy_local(1)) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then - if (xy_local(2) >= airfoil_grid_l(k)%y) then - !!IB - ib_markers_sf(i, j, 0) = patch_id - end if - else - f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) + f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) - if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then + if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id - end if + ib_markers_sf(i, j, 0) = patch_id end if end if end if - end do + end if end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_airfoil @@ -425,56 +425,56 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,inverse_rotation,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=3) - do l = 0, p - do j = 0, n - do i = 0, m - xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (xyz_local(3) >= z_min .and. xyz_local(3) <= z_max) then - - if (xyz_local(1) >= 0._wp .and. xyz_local(1) <= ca_in) then - if (xyz_local(2) >= 0._wp) then - k = 1 - do while (airfoil_grid_u(k)%x < xyz_local(1)) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_u(k)%x, xyz_local(1))) then - if (xyz_local(2) <= airfoil_grid_u(k)%y) then - !!IB - ib_markers_sf(i, j, l) = patch_id - end if - else - f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) - if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then + do l = 0, p + do j = 0, n + do i = 0, m + xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (xyz_local(3) >= z_min .and. xyz_local(3) <= z_max) then + + if (xyz_local(1) >= 0._wp .and. xyz_local(1) <= ca_in) then + if (xyz_local(2) >= 0._wp) then + k = 1 + do while (airfoil_grid_u(k)%x < xyz_local(1)) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_u(k)%x, xyz_local(1))) then + if (xyz_local(2) <= airfoil_grid_u(k)%y) then !!IB - ib_markers_sf(i, j, l) = patch_id - end if + ib_markers_sf(i, j, l) = patch_id end if else - k = 1 - do while (airfoil_grid_l(k)%x < xyz_local(1)) - k = k + 1 - end do - if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then - if (xyz_local(2) >= airfoil_grid_l(k)%y) then + f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) + if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id - end if - else - f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) + ib_markers_sf(i, j, l) = patch_id + end if + end if + else + k = 1 + do while (airfoil_grid_l(k)%x < xyz_local(1)) + k = k + 1 + end do + if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then + if (xyz_local(2) >= airfoil_grid_l(k)%y) then + !!IB + ib_markers_sf(i, j, l) = patch_id + end if + else + f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) - if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then + if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id - end if + ib_markers_sf(i, j, l) = patch_id end if end if end if end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_3D_airfoil @@ -524,23 +524,23 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = 0, n - do i = 0, m - ! get the x and y coordinates in the local IB frame - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - xy_local = matmul(inverse_rotation, xy_local) + do j = 0, n + do i = 0, m + ! get the x and y coordinates in the local IB frame + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) - if (-0.5_wp*length(1) <= xy_local(1) .and. & - 0.5_wp*length(1) >= xy_local(1) .and. & - -0.5_wp*length(2) <= xy_local(2) .and. & - 0.5_wp*length(2) >= xy_local(2)) then + if (-0.5_wp*length(1) <= xy_local(1) .and. & + 0.5_wp*length(1) >= xy_local(1) .and. & + -0.5_wp*length(2) <= xy_local(2) .and. & + 0.5_wp*length(2) >= xy_local(2)) then - ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, 0) = patch_id + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, 0) = patch_id - end if - end do + end if end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_rectangle @@ -584,24 +584,24 @@ contains ! the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if - ! Updating the patch identities bookkeeping variable - if (((x_cc(i) - center(1))**2 & - + (cart_y - center(2))**2 & - + (cart_z - center(3))**2 <= radius**2)) then - ib_markers_sf(i, j, k) = patch_id - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + if (grid_geometry == 3) then + call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) + else + cart_y = y_cc(j) + cart_z = z_cc(k) + end if + ! Updating the patch identities bookkeeping variable + if (((x_cc(i) - center(1))**2 & + + (cart_y - center(2))**2 & + + (cart_z - center(3))**2 <= radius**2)) then + ib_markers_sf(i, j, k) = patch_id + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_sphere @@ -646,33 +646,33 @@ contains ! of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = 0, p + do j = 0, n + do i = 0, m - if (grid_geometry == 3) then - ! TODO :: This does not work and is not covered by any tests. This should be fixed - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if - xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (-0.5*length(1) <= xyz_local(1) .and. & - 0.5*length(1) >= xyz_local(1) .and. & - -0.5*length(2) <= xyz_local(2) .and. & - 0.5*length(2) >= xyz_local(2) .and. & - -0.5*length(3) <= xyz_local(3) .and. & - 0.5*length(3) >= xyz_local(3)) then - - ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id - end if - end do + if (grid_geometry == 3) then + ! TODO :: This does not work and is not covered by any tests. This should be fixed + call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) + else + cart_y = y_cc(j) + cart_z = z_cc(k) + end if + xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (-0.5*length(1) <= xyz_local(1) .and. & + 0.5*length(1) >= xyz_local(1) .and. & + -0.5*length(2) <= xyz_local(2) .and. & + 0.5*length(2) >= xyz_local(2) .and. & + -0.5*length(3) <= xyz_local(3) .and. & + 0.5*length(3) >= xyz_local(3)) then + + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, k) = patch_id + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_cuboid @@ -721,43 +721,43 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = 0, p + do j = 0, n + do i = 0, m - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if - xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB - xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates - - if (((.not. f_is_default(length(1)) .and. & - xyz_local(2)**2 & - + xyz_local(3)**2 <= radius**2 .and. & - -0.5_wp*length(1) <= xyz_local(1) .and. & - 0.5_wp*length(1) >= xyz_local(1)) & - .or. & - (.not. f_is_default(length(2)) .and. & - xyz_local(1)**2 & - + xyz_local(3)**2 <= radius**2 .and. & - -0.5_wp*length(2) <= xyz_local(2) .and. & - 0.5_wp*length(2) >= xyz_local(2)) & - .or. & - (.not. f_is_default(length(3)) .and. & - xyz_local(1)**2 & - + xyz_local(2)**2 <= radius**2 .and. & - -0.5_wp*length(3) <= xyz_local(3) .and. & - 0.5_wp*length(3) >= xyz_local(3)))) then - - ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id - end if - end do + if (grid_geometry == 3) then + call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) + else + cart_y = y_cc(j) + cart_z = z_cc(k) + end if + xyz_local = [x_cc(i), cart_y, cart_z] - center ! get coordinate frame centered on IB + xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates + + if (((.not. f_is_default(length(1)) .and. & + xyz_local(2)**2 & + + xyz_local(3)**2 <= radius**2 .and. & + -0.5_wp*length(1) <= xyz_local(1) .and. & + 0.5_wp*length(1) >= xyz_local(1)) & + .or. & + (.not. f_is_default(length(2)) .and. & + xyz_local(1)**2 & + + xyz_local(3)**2 <= radius**2 .and. & + -0.5_wp*length(2) <= xyz_local(2) .and. & + 0.5_wp*length(2) >= xyz_local(2)) & + .or. & + (.not. f_is_default(length(3)) .and. & + xyz_local(1)**2 & + + xyz_local(2)**2 <= radius**2 .and. & + -0.5_wp*length(3) <= xyz_local(3) .and. & + 0.5_wp*length(3) >= xyz_local(3)))) then + + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, k) = patch_id + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_cylinder diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 21594edce..7852f46de 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -758,151 +758,151 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = q_comm(i)%sf(j + pack_offset, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=4,private='[r,i,j,k,l]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = pb_in(j + pack_offset, k, l, i - nVar, q) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = pb_in(j + pack_offset, k, l, i - nVar, q) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = mv_in(j + pack_offset, k, l, i - nVar, q) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = mv_in(j + pack_offset, k, l, i - nVar, q) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) - end do + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = q_comm(i)%sf(j, k + pack_offset, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = pb_in(j, k + pack_offset, l, i - nVar, q) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = pb_in(j, k + pack_offset, l, i - nVar, q) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = mv_in(j, k + pack_offset, l, i - nVar, q) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = mv_in(j, k + pack_offset, l, i - nVar, q) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:else $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) - end do + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = q_comm(i)%sf(j, k, l + pack_offset) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = pb_in(j, k, l + pack_offset, i - nVar, q) - end do + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = pb_in(j, k, l + pack_offset, i - nVar, q) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = mv_in(j, k, l + pack_offset, i - nVar, q) - end do + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = mv_in(j, k, l + pack_offset, i - nVar, q) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -959,174 +959,174 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = buff_recv(r) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = buff_recv(r) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:else ! Unpacking buffer from bc_z%beg $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = buff_recv(r) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) - end do + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) - end do + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = buff_recv(r) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 235792978..e5f5b0278 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -100,175 +100,175 @@ contains ! starting equilibrium solver $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') - do j = 0, m - do k = 0, n - do l = 0, p + do j = 0, m + do k = 0, n + do l = 0, p - rho = 0.0_wp; TvF = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + rho = 0.0_wp; TvF = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - ! Mixture density - rho = rho + q_cons_vf(i + contxb - 1)%sf(j, k, l) + ! Mixture density + rho = rho + q_cons_vf(i + contxb - 1)%sf(j, k, l) - ! Total Volume Fraction - TvF = TvF + q_cons_vf(i + advxb - 1)%sf(j, k, l) + ! Total Volume Fraction + TvF = TvF + q_cons_vf(i + advxb - 1)%sf(j, k, l) - end do + end do - ! calculating the total reacting mass for the phase change process. By hypothesis, this should not change - ! throughout the phase-change process. - rM = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + q_cons_vf(vp + contxb - 1)%sf(j, k, l) + ! calculating the total reacting mass for the phase change process. By hypothesis, this should not change + ! throughout the phase-change process. + rM = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + q_cons_vf(vp + contxb - 1)%sf(j, k, l) - ! correcting negative (reacting) mass fraction values in case they happen - call s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) + ! correcting negative (reacting) mass fraction values in case they happen + call s_correct_partial_densities(MCT, q_cons_vf, rM, j, k, l) - ! fixing m1 and m2 AFTER correcting the partial densities. Note that these values must be stored for the phase - ! change process that will happen a posteriori - m1 = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + ! fixing m1 and m2 AFTER correcting the partial densities. Note that these values must be stored for the phase + ! change process that will happen a posteriori + m1 = q_cons_vf(lp + contxb - 1)%sf(j, k, l) - m2 = q_cons_vf(vp + contxb - 1)%sf(j, k, l) + m2 = q_cons_vf(vp + contxb - 1)%sf(j, k, l) - ! kinetic energy as an auxiliary variable to the calculation of the total internal energy - dynE = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe + ! kinetic energy as an auxiliary variable to the calculation of the total internal energy + dynE = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe - dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho + dynE = dynE + 5.0e-1_wp*q_cons_vf(i)%sf(j, k, l)**2/rho - end do + end do - ! calculating the total energy that MUST be preserved throughout the pT- and pTg-relaxation procedures - ! at each of the cells. The internal energy is calculated as the total energy minus the kinetic - ! energy to preserved its value at sharp interfaces - rhoe = q_cons_vf(E_idx)%sf(j, k, l) - dynE + ! calculating the total energy that MUST be preserved throughout the pT- and pTg-relaxation procedures + ! at each of the cells. The internal energy is calculated as the total energy minus the kinetic + ! energy to preserved its value at sharp interfaces + rhoe = q_cons_vf(E_idx)%sf(j, k, l) - dynE - ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium - ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 - call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) + ! Calling pT-equilibrium for either finishing phase-change module, or as an IC for the pTg-equilibrium + ! for this case, MFL cannot be either 0 or 1, so I chose it to be 2 + call s_infinite_pt_relaxation_k(j, k, l, 2, pS, p_infpT, q_cons_vf, rhoe, TS) - ! check if pTg-equilibrium is required - ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities - ! given the outputs from the pT- and pTg-equilibrium solvers are just p and one of the partial masses - ! (pTg- case) - if ((relax_model == 6) .and. ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) > mixM*rM) & - .and. (q_cons_vf(vp + contxb - 1)%sf(j, k, l) > mixM*rM)) & - .and. (pS < pCr) .and. (TS < TCr)) then + ! check if pTg-equilibrium is required + ! NOTE that NOTHING else needs to be updated OTHER than the individual partial densities + ! given the outputs from the pT- and pTg-equilibrium solvers are just p and one of the partial masses + ! (pTg- case) + if ((relax_model == 6) .and. ((q_cons_vf(lp + contxb - 1)%sf(j, k, l) > mixM*rM) & + .and. (q_cons_vf(vp + contxb - 1)%sf(j, k, l) > mixM*rM)) & + .and. (pS < pCr) .and. (TS < TCr)) then - ! Checking if phase change is needed, by checking whether the final solution is either subcoooled - ! liquid or overheated vapor. + ! Checking if phase change is needed, by checking whether the final solution is either subcoooled + ! liquid or overheated vapor. - ! overheated vapor case - ! depleting the mass of liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM + ! overheated vapor case + ! depleting the mass of liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - ! transferring the total mass to vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! transferring the total mass to vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! calling pT-equilibrium for overheated vapor, which is MFL = 0 - call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) + ! calling pT-equilibrium for overheated vapor, which is MFL = 0 + call s_infinite_pt_relaxation_k(j, k, l, 0, pSOV, p_infOV, q_cons_vf, rhoe, TSOV) - ! calculating Saturation temperature - call s_TSat(pSOV, TSatOV, TSOV) + ! calculating Saturation temperature + call s_TSat(pSOV, TSatOV, TSOV) - ! subcooled liquid case - ! transferring the total mass to liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - - ! depleting the mass of vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM + ! subcooled liquid case + ! transferring the total mass to liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 - call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) + ! depleting the mass of vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM - ! calculating Saturation temperature - call s_TSat(pSSL, TSatSL, TSSL) + ! calling pT-equilibrium for subcooled liquid, which is MFL = 1 + call s_infinite_pt_relaxation_k(j, k, l, 1, pSSL, p_infSL, q_cons_vf, rhoe, TSSL) - ! checking the conditions for overheated vapor and subcooled liquide - if (TSOV > TSatOV) then + ! calculating Saturation temperature + call s_TSat(pSSL, TSatSL, TSSL) - ! Assigning pressure - pS = pSOV + ! checking the conditions for overheated vapor and subcooled liquide + if (TSOV > TSatOV) then - ! Assigning Temperature - TS = TSOV + ! Assigning pressure + pS = pSOV - ! correcting the liquid partial density - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM + ! Assigning Temperature + TS = TSOV - ! correcting the vapor partial density - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! correcting the liquid partial density + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = mixM*rM - elseif (TSSL < TSatSL) then + ! correcting the vapor partial density + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - ! Assigning pressure - pS = pSSL + elseif (TSSL < TSatSL) then - ! Assigning Temperature - TS = TSSL + ! Assigning pressure + pS = pSSL - ! correcting the liquid partial density - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM + ! Assigning Temperature + TS = TSSL - ! correcting the vapor partial density - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM + ! correcting the liquid partial density + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = (1.0_wp - mixM)*rM - else + ! correcting the vapor partial density + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = mixM*rM - ! returning partial pressures to what they were from the homogeneous solver - ! liquid - q_cons_vf(lp + contxb - 1)%sf(j, k, l) = m1 + else - ! vapor - q_cons_vf(vp + contxb - 1)%sf(j, k, l) = m2 + ! returning partial pressures to what they were from the homogeneous solver + ! liquid + q_cons_vf(lp + contxb - 1)%sf(j, k, l) = m1 - ! calling the pTg-equilibrium solver - call s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) + ! vapor + q_cons_vf(vp + contxb - 1)%sf(j, k, l) = m2 - end if + ! calling the pTg-equilibrium solver + call s_infinite_ptg_relaxation_k(j, k, l, pS, p_infpT, rhoe, q_cons_vf, TS) end if - ! Calculations AFTER equilibrium + end if + + ! Calculations AFTER equilibrium - ! entropy - sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & - /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) + ! entropy + sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & + /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) - ! enthalpy - hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & - + qvs(1:num_fluids) + ! enthalpy + hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & + + qvs(1:num_fluids) - ! Gibbs-free energy - gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) + ! Gibbs-free energy + gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) - ! densities - rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & - /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) + ! densities + rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & + /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) - ! internal energy - ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & - *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & - *cvs(1:num_fluids)*TS + qvs(1:num_fluids) + ! internal energy + ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & + *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & + *cvs(1:num_fluids)*TS + qvs(1:num_fluids) - ! calculating volume fractions, internal energies, and total entropy - rhos = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + ! calculating volume fractions, internal energies, and total entropy + rhos = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - ! volume fractions - q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rhok(i) + ! volume fractions + q_cons_vf(i + advxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)/rhok(i) - ! alpha*rho*e - q_cons_vf(i + intxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)*ek(i) + ! alpha*rho*e + q_cons_vf(i + intxb - 1)%sf(j, k, l) = q_cons_vf(i + contxb - 1)%sf(j, k, l)*ek(i) - ! Total entropy - rhos = rhos + q_cons_vf(i + contxb - 1)%sf(j, k, l)*sk(i) + ! Total entropy + rhos = rhos + q_cons_vf(i + contxb - 1)%sf(j, k, l)*sk(i) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_infinite_relaxation_k diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 94e213608..dd9bedb6c 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -874,297 +874,297 @@ contains #:endif $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, T]') - do l = ibounds(3)%beg, ibounds(3)%end - do k = ibounds(2)%beg, ibounds(2)%end - do j = ibounds(1)%beg, ibounds(1)%end - dyn_pres_K = 0._wp - - if (igr) then - if (num_fluids == 1) then - alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) - alpha_K(1) = 1._wp - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - - alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) - alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) - end if + do l = ibounds(3)%beg, ibounds(3)%end + do k = ibounds(2)%beg, ibounds(2)%end + do j = ibounds(1)%beg, ibounds(1)%end + dyn_pres_K = 0._wp + + if (igr) then + if (num_fluids == 1) then + alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) + alpha_K(1) = 1._wp else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) end do + + alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) + alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) end if + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + end if - if (model_eqns /= 4) then + if (model_eqns /= 4) then #ifdef MFC_SIMULATION - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & + alpha_rho_K, Re_K, G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if #else - ! If pre-processing, use non acc mixture subroutines - if (elasticity) then - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) - else - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K) - end if -#endif + ! If pre-processing, use non acc mixture subroutines + if (elasticity) then + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) + else + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K) end if +#endif + end if - if (relativity) then - if (n == 0) then - B(1) = Bx0 - B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - else - B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) - end if - B2 = B(1)**2 + B(2)**2 + B(3)**2 - - m2 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 - end do + if (relativity) then + if (n == 0) then + B(1) = Bx0 + B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + else + B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) + end if + B2 = B(1)**2 + B(2)**2 + B(3)**2 - S = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) - end do + m2 = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 + end do - E = qK_cons_vf(E_idx)%sf(j, k, l) + S = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) + end do - D = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - D = D + qK_cons_vf(i)%sf(j, k, l) - end do + E = qK_cons_vf(E_idx)%sf(j, k, l) - ! Newton-Raphson - W = E + D - $:GPU_LOOP(parallelism='[seq]') - do iter = 1, relativity_cons_to_prim_max_iter - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS - f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D - - ! The first equation below corrects a typo in (Mignone & Bodo, 2006) - ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms - ! This corrected version is not used as the second equation empirically converges faster. - ! First equation is kept for further investigation. - ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) - dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) - - dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) - df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 - - dW = -f/df_dW - W = W + dW - if (abs(dW) < 1.e-12_wp*W) exit - end do + D = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + D = D + qK_cons_vf(i)%sf(j, k, l) + end do - ! Recalculate pressure using converged W + ! Newton-Raphson + W = E + D + $:GPU_LOOP(parallelism='[seq]') + do iter = 1, relativity_cons_to_prim_max_iter Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) + pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS + f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D + + ! The first equation below corrects a typo in (Mignone & Bodo, 2006) + ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms + ! This corrected version is not used as the second equation empirically converges faster. + ! First equation is kept for further investigation. + ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) + dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) + + dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) + df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 + + dW = -f/df_dW + W = W + dW + if (abs(dW) < 1.e-12_wp*W) exit + end do - ! Recover the other primitive variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) - end do - qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now + ! Recalculate pressure using converged W + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do + ! Recover the other primitive variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) + end do + qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - cycle ! skip all the non-relativistic conversions below - end if + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do - if (chemistry) then - rho_K = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) - end do + cycle ! skip all the non-relativistic conversions below + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = rho_K - end do + if (chemistry) then + rho_K = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = rho_K + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if #ifdef MFC_SIMULATION - rho_K = max(rho_K, sgm_eps) + rho_K = max(rho_K, sgm_eps) #endif + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + if (model_eqns /= 4) then + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /rho_K + dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & + *qK_prim_vf(i)%sf(j, k, l) + else + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /qK_cons_vf(1)%sf(j, k, l) + end if + end do + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - if (model_eqns /= 4) then - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /rho_K - dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & - *qK_prim_vf(i)%sf(j, k, l) - else - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /qK_cons_vf(1)%sf(j, k, l) - end if + do i = 1, num_species + rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) end do - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) - end do - - T = q_T_sf%sf(j, k, l) - end if + T = q_T_sf%sf(j, k, l) + end if - if (mhd) then - if (n == 0) then - pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) - else - pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) - end if + if (mhd) then + if (n == 0) then + pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) else - pres_mag = 0._wp + pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) end if + else + pres_mag = 0._wp + end if - call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & - qK_cons_vf(alf_idx)%sf(j, k, l), & - dyn_pres_K, pi_inf_K, gamma_K, rho_K, & - qv_K, rhoYks, pres, T, pres_mag=pres_mag) + call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & + qK_cons_vf(alf_idx)%sf(j, k, l), & + dyn_pres_K, pi_inf_K, gamma_K, rho_K, & + qv_K, rhoYks, pres, T, pres_mag=pres_mag) - qK_prim_vf(E_idx)%sf(j, k, l) = pres + qK_prim_vf(E_idx)%sf(j, k, l) = pres - if (chemistry) then - q_T_sf%sf(j, k, l) = T - end if + if (chemistry) then + q_T_sf%sf(j, k, l) = T + end if - if (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) - end do + if (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) + end do - vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) + vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) - if (qbmm) then - !Get nb (constant across all R0 bins) - nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) + if (qbmm) then + !Get nb (constant across all R0 bins) + nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !Convert cons to prim - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do - !Need to keep track of nb in the primitive variable list (converted back to true value before output) + !Convert cons to prim + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do + !Need to keep track of nb in the primitive variable list (converted back to true value before output) #ifdef MFC_SIMULATION - qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) + qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) #endif + else + if (adv_n) then + qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) + nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) else - if (adv_n) then - qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) - nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) - else - call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do + call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) end if - end if - if (mhd) then $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do end if + end if - if (elasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (mhd) then + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - ! subtracting elastic contribution for pressure calculation - if (G_K > verysmall) then - if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) + if (elasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if + + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + ! subtracting elastic contribution for pressure calculation + if (G_K > verysmall) then + if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) + qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & + ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + ! Double for shear stresses + if (any(i == shear_indices)) then qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - ! Double for shear stresses - if (any(i == shear_indices)) then - qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & - ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - end if end if - end do - end if + end if + end do + end if - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = xibeg, xiend - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = xibeg, xiend + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (.not. igr .or. num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + if (.not. igr .or. num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (surface_tension) then - qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) - end if + if (surface_tension) then + qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) + end if - if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) + if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) #ifdef MFC_POST_PROCESS - if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) #endif - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_convert_conservative_to_primitive_variables @@ -1493,111 +1493,111 @@ contains ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') - do l = is3b, is3e - do k = is2b, is2e - do j = is1b, is1e + do l = is3b, is3e + do k = is2b, is2e + do j = is1b, is1e - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_K(i) = qK_prim_vf(j, k, l, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_K(i) = qK_prim_vf(j, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K(i) = qK_prim_vf(j, k, l, contxe + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K(i) = qK_prim_vf(j, k, l, contxe + i) + end do + + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K_sum = vel_K_sum + vel_K(i)**2._wp + end do - vel_K_sum = 0._wp + pres_K = qK_prim_vf(j, k, l, E_idx) + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K, & + G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & + pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if + + ! Computing the energy from the pressure + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K_sum = vel_K_sum + vel_K(i)**2._wp + do i = chemxb, chemxe + Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do + !Computing the energy from the internal energy of the mixture + call get_mixture_molecular_weight(Y_k, mix_mol_weight) + R_gas = gas_constant/mix_mol_weight + T_K = pres_K/rho_K/R_gas + call get_mixture_energy_mass(T_K, Y_K, E_K) + E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum + else + ! Computing the energy from the pressure + E_K = gamma_K*pres_K + pi_inf_K & + + 5.e-1_wp*rho_K*vel_K_sum + qv_K + end if - pres_K = qK_prim_vf(j, k, l, E_idx) - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, & - G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & - pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! mass flux, this should be \alpha_i \rho_i u_i + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) + end do - ! Computing the energy from the pressure + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + FK_vf(j, k, l, contxe + dir_idx(i)) = & + rho_K*vel_K(dir_idx(1)) & + *vel_K(dir_idx(i)) & + + pres_K*dir_flg(dir_idx(i)) + end do - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) - end do - !Computing the energy from the internal energy of the mixture - call get_mixture_molecular_weight(Y_k, mix_mol_weight) - R_gas = gas_constant/mix_mol_weight - T_K = pres_K/rho_K/R_gas - call get_mixture_energy_mass(T_K, Y_K, E_K) - E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum - else - ! Computing the energy from the pressure - E_K = gamma_K*pres_K + pi_inf_K & - + 5.e-1_wp*rho_K*vel_K_sum + qv_K - end if + ! energy flux, u(E+p) + FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - ! mass flux, this should be \alpha_i \rho_i u_i + ! Species advection Flux, \rho*u*Y + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) + do i = 1, num_species + FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) end do + end if + if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - FK_vf(j, k, l, contxe + dir_idx(i)) = & - rho_K*vel_K(dir_idx(1)) & - *vel_K(dir_idx(i)) & - + pres_K*dir_flg(dir_idx(i)) + do i = advxb, advxe + FK_vf(j, k, l, i) = 0._wp + FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) end do - ! energy flux, u(E+p) - FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - - ! Species advection Flux, \rho*u*Y - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) - end do - end if - - if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = 0._wp - FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) - end do - - else - ! Could be bubbles_euler! - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) - end do + else + ! Could be bubbles_euler! + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) + end do - end if + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() #endif end subroutine s_convert_primitive_to_flux_variables diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index d17d2d041..dae9e6ad4 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -167,17 +167,17 @@ contains sim_time = t_step*dt $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - mass_src(j, k, l) = 0._wp - mom_src(1, j, k, l) = 0._wp - e_src(j, k, l) = 0._wp - if (n > 0) mom_src(2, j, k, l) = 0._wp - if (p > 0) mom_src(3, j, k, l) = 0._wp - end do + do l = 0, p + do k = 0, n + do j = 0, m + mass_src(j, k, l) = 0._wp + mom_src(1, j, k, l) = 0._wp + e_src(j, k, l) = 0._wp + if (n > 0) mom_src(2, j, k, l) = 0._wp + if (p > 0) mom_src(3, j, k, l) = 0._wp end do end do + end do $:END_GPU_PARALLEL_LOOP() ! Keep outer loop sequel because different sources can have very different number of points @@ -221,123 +221,123 @@ contains deallocate (phi_rn) $:GPU_PARALLEL_LOOP(private='[i,myalpha,myalpha_rho]') - do i = 1, num_points - j = source_spatials(ai)%coord(1, i) - k = source_spatials(ai)%coord(2, i) - l = source_spatials(ai)%coord(3, i) - - ! Compute speed of sound - myRho = 0._wp - B_tait = 0._wp - small_gamma = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) - myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) - end do - - if (bubbles_euler) then - if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - 1 - myRho = myRho + myalpha_rho(q) - B_tait = B_tait + myalpha(q)*pi_infs(q) - small_gamma = small_gamma + myalpha(q)*gammas(q) - end do - else - myRho = myalpha_rho(1) - B_tait = pi_infs(1) - small_gamma = gammas(1) - end if - end if + do i = 1, num_points + j = source_spatials(ai)%coord(1, i) + k = source_spatials(ai)%coord(2, i) + l = source_spatials(ai)%coord(3, i) + + ! Compute speed of sound + myRho = 0._wp + B_tait = 0._wp + small_gamma = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myalpha_rho(q) = q_cons_vf(q)%sf(j, k, l) + myalpha(q) = q_cons_vf(advxb + q - 1)%sf(j, k, l) + end do - if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then + if (bubbles_euler) then + if (num_fluids > 2) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids + do q = 1, num_fluids - 1 myRho = myRho + myalpha_rho(q) B_tait = B_tait + myalpha(q)*pi_infs(q) small_gamma = small_gamma + myalpha(q)*gammas(q) end do + else + myRho = myalpha_rho(1) + B_tait = pi_infs(1) + small_gamma = gammas(1) end if + end if - small_gamma = 1._wp/small_gamma + 1._wp - c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) + if ((.not. bubbles_euler) .or. (mpp_lim .and. (num_fluids > 2))) then + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + myRho = myRho + myalpha_rho(q) + B_tait = B_tait + myalpha(q)*pi_infs(q) + small_gamma = small_gamma + myalpha(q)*gammas(q) + end do + end if - ! Wavelength to frequency conversion - if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) - if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) + small_gamma = 1._wp/small_gamma + 1._wp + c = sqrt(small_gamma*(q_prim_vf(E_idx)%sf(j, k, l) + ((small_gamma - 1._wp)/small_gamma)*B_tait)/myRho) - ! Update momentum source term - call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mom_src_diff = source_temporal*source_spatials(ai)%val(i) + ! Wavelength to frequency conversion + if (pulse(ai) == 1 .or. pulse(ai) == 3) frequency_local = f_frequency_local(freq_conv_flag, ai, c) + if (pulse(ai) == 2) gauss_sigma_time_local = f_gauss_sigma_time_local(gauss_conv_flag, ai, c) - if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) - mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c - if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) - cycle - end if + ! Update momentum source term + call s_source_temporal(sim_time, c, ai, mom_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mom_src_diff = source_temporal*source_spatials(ai)%val(i) - if (n == 0) then ! 1D - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave - - elseif (p == 0) then ! 2D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) - end if - - else ! 3D - if (support(ai) < 5) then ! Planar - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) - else - mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) - mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) - mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) - end if - end if + if (dipole(ai)) then ! Double amplitude & No momentum source term (only works for Planar) + mass_src(j, k, l) = mass_src(j, k, l) + 2._wp*mom_src_diff/c + if (model_eqns /= 4) E_src(j, k, l) = E_src(j, k, l) + 2._wp*mom_src_diff*c/(small_gamma - 1._wp) + cycle + end if - ! Update mass source term + if (n == 0) then ! 1D + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*sign(1._wp, dir(ai)) ! Left or right-going wave + + elseif (p == 0) then ! 2D if (support(ai) < 5) then ! Planar - mass_src_diff = mom_src_diff/c - else ! Spherical or cylindrical support - ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support - call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) - mass_src_diff = source_temporal*source_spatials(ai)%val(i) + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(source_spatials(ai)%angle(i)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(source_spatials(ai)%angle(i)) end if - mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff - ! Update energy source term - if (model_eqns /= 4) then - E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) + else ! 3D + if (support(ai) < 5) then ! Planar + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*cos(dir(ai)) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*sin(dir(ai)) + else + mom_src(1, j, k, l) = mom_src(1, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(1, i) + mom_src(2, j, k, l) = mom_src(2, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(2, i) + mom_src(3, j, k, l) = mom_src(3, j, k, l) + mom_src_diff*source_spatials(ai)%xyz_to_r_ratios(3, i) end if + end if + + ! Update mass source term + if (support(ai) < 5) then ! Planar + mass_src_diff = mom_src_diff/c + else ! Spherical or cylindrical support + ! Mass source term must be calculated differently using a correction term for spherical and cylindrical support + call s_source_temporal(sim_time, c, ai, mass_label, frequency_local, gauss_sigma_time_local, source_temporal, sum_BB) + mass_src_diff = source_temporal*source_spatials(ai)%val(i) + end if + mass_src(j, k, l) = mass_src(j, k, l) + mass_src_diff + + ! Update energy source term + if (model_eqns /= 4) then + E_src(j, k, l) = E_src(j, k, l) + mass_src_diff*c**2._wp/(small_gamma - 1._wp) + end if - end do + end do $:END_GPU_PARALLEL_LOOP() end if end do ! Update the rhs variables $:GPU_PARALLEL_LOOP(private='[j,k,l]',collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do q = contxb, contxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) - end do - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do q = contxb, contxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mass_src(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + rhs_vf(q)%sf(j, k, l) = rhs_vf(q)%sf(j, k, l) + mom_src(q - contxe, j, k, l) end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + e_src(j, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_acoustic_src_calculations diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 5eabe3519..fcfafcbe3 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -74,17 +74,17 @@ contains integer :: i, j, k, l !< standard iterators $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhoM(j, k, l) = 0._wp - do i = 1, num_fluids - rhoM(j, k, l) = rhoM(j, k, l) + & - q_cons_vf(contxb + i - 1)%sf(j, k, l) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhoM(j, k, l) = 0._wp + do i = 1, num_fluids + rhoM(j, k, l) = rhoM(j, k, l) + & + q_cons_vf(contxb + i - 1)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_mixture_density @@ -105,62 +105,62 @@ contains call s_compute_mixture_density(q_cons_vf) $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 0._wp - end do + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (bf_x) then ! x-direction body forces $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(1) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxb)%sf(j, k, l)*accel_bf(1) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(1) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxb)%sf(j, k, l)*accel_bf(1) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bf_y) then ! y-direction body forces $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(2) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxb + 1)%sf(j, k, l)*accel_bf(2) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(2) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxb + 1)%sf(j, k, l)*accel_bf(2) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bf_z) then ! z-direction body forces $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & - (rhoM(j, k, l))*accel_bf(3) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & + (rhoM(j, k, l))*accel_bf(3) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 3a7fc2fc9..5bfaba6e5 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -77,18 +77,18 @@ contains integer(wp) :: i, j, k, l $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - nR3bar = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp - end do - q_cons_vf(alf_idx)%sf(j, k, l) = (4._wp*pi*nR3bar)/(3._wp*q_cons_vf(n_idx)%sf(j, k, l)**2._wp) + do l = 0, p + do k = 0, n + do j = 0, m + nR3bar = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nR3bar = nR3bar + weight(i)*(q_cons_vf(rs(i))%sf(j, k, l))**3._wp end do + q_cons_vf(alf_idx)%sf(j, k, l) = (4._wp*pi*nR3bar)/(3._wp*q_cons_vf(n_idx)%sf(j, k, l)**2._wp) end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_comp_alpha_from_n @@ -105,48 +105,48 @@ contains if (.not. qbmm) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = 0._wp - divu_in%sf(j, k, l) = & - 5.e-1_wp/dx(j)*(q_prim_vf(contxe + idir)%sf(j + 1, k, l) - & - q_prim_vf(contxe + idir)%sf(j - 1, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = 0._wp + divu_in%sf(j, k, l) = & + 5.e-1_wp/dx(j)*(q_prim_vf(contxe + idir)%sf(j + 1, k, l) - & + q_prim_vf(contxe + idir)%sf(j - 1, k, l)) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if elseif (idir == 2) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & - 5.e-1_wp/dy(k)*(q_prim_vf(contxe + idir)%sf(j, k + 1, l) - & - q_prim_vf(contxe + idir)%sf(j, k - 1, l)) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & + 5.e-1_wp/dy(k)*(q_prim_vf(contxe + idir)%sf(j, k + 1, l) - & + q_prim_vf(contxe + idir)%sf(j, k - 1, l)) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & - 5.e-1_wp/dz(l)*(q_prim_vf(contxe + idir)%sf(j, k, l + 1) - & - q_prim_vf(contxe + idir)%sf(j, k, l - 1)) + do l = 0, p + do k = 0, n + do j = 0, m + divu_in%sf(j, k, l) = divu_in%sf(j, k, l) + & + 5.e-1_wp/dz(l)*(q_prim_vf(contxe + idir)%sf(j, k, l + 1) - & + q_prim_vf(contxe + idir)%sf(j, k, l - 1)) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -178,178 +178,178 @@ contains real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - bub_adv_src(j, k, l) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - bub_r_src(j, k, l, q) = 0._wp - bub_v_src(j, k, l, q) = 0._wp - bub_p_src(j, k, l, q) = 0._wp - bub_m_src(j, k, l, q) = 0._wp - end do + do l = 0, p + do k = 0, n + do j = 0, m + bub_adv_src(j, k, l) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + bub_r_src(j, k, l, q) = 0._wp + bub_v_src(j, k, l, q) = 0._wp + bub_p_src(j, k, l, q) = 0._wp + bub_m_src(j, k, l, q) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() adap_dt_stop_max = 0 $:GPU_PARALLEL_LOOP(private='[j,k,l,Rtmp, Vtmp, myalpha_rho, myalpha]', collapse=3, & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') - do l = 0, p - do k = 0, n - do j = 0, m + do l = 0, p + do k = 0, n + do j = 0, m - if (adv_n) then - nbub = q_prim_vf(n_idx)%sf(j, k, l) - else - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) - Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) - end do + if (adv_n) then + nbub = q_prim_vf(n_idx)%sf(j, k, l) + else + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + Rtmp(q) = q_prim_vf(rs(q))%sf(j, k, l) + Vtmp(q) = q_prim_vf(vs(q))%sf(j, k, l) + end do - R3 = 0._wp + R3 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - R3 = R3 + weight(q)*Rtmp(q)**3._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + R3 = R3 + weight(q)*Rtmp(q)**3._wp + end do - nbub = (3._wp/(4._wp*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3 - end if + nbub = (3._wp/(4._wp*pi))*q_prim_vf(alf_idx)%sf(j, k, l)/R3 + end if - if (.not. adap_dt) then - R2Vav = 0._wp + if (.not. adap_dt) then + R2Vav = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + R2Vav = R2Vav + weight(q)*Rtmp(q)**2._wp*Vtmp(q) + end do - bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav - end if + bub_adv_src(j, k, l) = 4._wp*pi*nbub*R2Vav + end if + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb + do ii = 1, num_fluids + myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) + myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) + end do + myRho = 0._wp + n_tait = 0._wp + B_tait = 0._wp + + if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do ii = 1, num_fluids - myalpha_rho(ii) = q_cons_vf(ii)%sf(j, k, l) - myalpha(ii) = q_cons_vf(advxb + ii - 1)%sf(j, k, l) + myRho = myRho + myalpha_rho(ii) + n_tait = n_tait + myalpha(ii)*gammas(ii) + B_tait = B_tait + myalpha(ii)*pi_infs(ii) + end do + else if (num_fluids > 2) then + $:GPU_LOOP(parallelism='[seq]') + do ii = 1, num_fluids - 1 + myRho = myRho + myalpha_rho(ii) + n_tait = n_tait + myalpha(ii)*gammas(ii) + B_tait = B_tait + myalpha(ii)*pi_infs(ii) end do + else + myRho = myalpha_rho(1) + n_tait = gammas(1) + B_tait = pi_infs(1)/pi_fac + end if - myRho = 0._wp - n_tait = 0._wp - B_tait = 0._wp - - if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do ii = 1, num_fluids - myRho = myRho + myalpha_rho(ii) - n_tait = n_tait + myalpha(ii)*gammas(ii) - B_tait = B_tait + myalpha(ii)*pi_infs(ii) - end do - else if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do ii = 1, num_fluids - 1 - myRho = myRho + myalpha_rho(ii) - n_tait = n_tait + myalpha(ii)*gammas(ii) - B_tait = B_tait + myalpha(ii)*pi_infs(ii) - end do - else - myRho = myalpha_rho(1) - n_tait = gammas(1) - B_tait = pi_infs(1)/pi_fac - end if + n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' + B_tait = B_tait*(n_tait - 1)/n_tait ! make this the usual pi_inf - n_tait = 1._wp/n_tait + 1._wp !make this the usual little 'gamma' - B_tait = B_tait*(n_tait - 1)/n_tait ! make this the usual pi_inf + myRho = q_prim_vf(1)%sf(j, k, l) + myP = q_prim_vf(E_idx)%sf(j, k, l) + alf = q_prim_vf(alf_idx)%sf(j, k, l) + myR = q_prim_vf(rs(q))%sf(j, k, l) + myV = q_prim_vf(vs(q))%sf(j, k, l) - myRho = q_prim_vf(1)%sf(j, k, l) - myP = q_prim_vf(E_idx)%sf(j, k, l) - alf = q_prim_vf(alf_idx)%sf(j, k, l) - myR = q_prim_vf(rs(q))%sf(j, k, l) - myV = q_prim_vf(vs(q))%sf(j, k, l) + if (.not. polytropic) then + pb_local = q_prim_vf(ps(q))%sf(j, k, l) + mv_local = q_prim_vf(ms(q))%sf(j, k, l) + call s_bwproperty(pb_local, q, chi_vw, k_mw, rho_mw) + call s_vflux(myR, myV, pb_local, mv_local, q, vflux) + pbdot = f_bpres_dot(vflux, myR, myV, pb_local, mv_local, q) - if (.not. polytropic) then - pb_local = q_prim_vf(ps(q))%sf(j, k, l) - mv_local = q_prim_vf(ms(q))%sf(j, k, l) - call s_bwproperty(pb_local, q, chi_vw, k_mw, rho_mw) - call s_vflux(myR, myV, pb_local, mv_local, q, vflux) - pbdot = f_bpres_dot(vflux, myR, myV, pb_local, mv_local, q) - - bub_p_src(j, k, l, q) = nbub*pbdot - bub_m_src(j, k, l, q) = nbub*vflux*4._wp*pi*(myR**2._wp) - else - pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp - end if - - ! Adaptive time stepping - adap_dt_stop = 0 - - if (adap_dt) then + bub_p_src(j, k, l, q) = nbub*pbdot + bub_m_src(j, k, l, q) = nbub*vflux*4._wp*pi*(myR**2._wp) + else + pb_local = 0._wp; mv_local = 0._wp; vflux = 0._wp; pbdot = 0._wp + end if - call s_advance_step(myRho, myP, myR, myV, R0(q), & - pb_local, pbdot, alf, n_tait, B_tait, & - bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & - dmBeta_t, dmCson, adap_dt_stop) + ! Adaptive time stepping + adap_dt_stop = 0 - q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR - q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV + if (adap_dt) then - else - rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + call s_advance_step(myRho, myP, myR, myV, R0(q), & pb_local, pbdot, alf, n_tait, B_tait, & bub_adv_src(j, k, l), divu_in%sf(j, k, l), & - dmCson) - bub_v_src(j, k, l, q) = nbub*rddot - bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) - end if + dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & + dmBeta_t, dmCson, adap_dt_stop) - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR + q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV - if (alf < 1.e-11_wp) then - bub_adv_src(j, k, l) = 0._wp - bub_r_src(j, k, l, q) = 0._wp - bub_v_src(j, k, l, q) = 0._wp - if (.not. polytropic) then - bub_p_src(j, k, l, q) = 0._wp - bub_m_src(j, k, l, q) = 0._wp - end if + else + rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + pb_local, pbdot, alf, n_tait, B_tait, & + bub_adv_src(j, k, l), divu_in%sf(j, k, l), & + dmCson) + bub_v_src(j, k, l, q) = nbub*rddot + bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) + end if + + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + + if (alf < 1.e-11_wp) then + bub_adv_src(j, k, l) = 0._wp + bub_r_src(j, k, l, q) = 0._wp + bub_v_src(j, k, l, q) = 0._wp + if (.not. polytropic) then + bub_p_src(j, k, l, q) = 0._wp + bub_m_src(j, k, l, q) = 0._wp end if - end do + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then $:GPU_PARALLEL_LOOP(private='[i,l,q]', collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) - if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & - rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) - $:GPU_LOOP(parallelism='[seq]') - do k = 1, nb - rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) - rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) - if (polytropic .neqv. .true.) then - rhs_vf(ps(k))%sf(i, q, l) = rhs_vf(ps(k))%sf(i, q, l) + bub_p_src(i, q, l, k) - rhs_vf(ms(k))%sf(i, q, l) = rhs_vf(ms(k))%sf(i, q, l) + bub_m_src(i, q, l, k) - end if - end do + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + bub_adv_src(i, q, l) + if (num_fluids > 1) rhs_vf(advxb)%sf(i, q, l) = & + rhs_vf(advxb)%sf(i, q, l) - bub_adv_src(i, q, l) + $:GPU_LOOP(parallelism='[seq]') + do k = 1, nb + rhs_vf(rs(k))%sf(i, q, l) = rhs_vf(rs(k))%sf(i, q, l) + bub_r_src(i, q, l, k) + rhs_vf(vs(k))%sf(i, q, l) = rhs_vf(vs(k))%sf(i, q, l) + bub_v_src(i, q, l, k) + if (polytropic .neqv. .true.) then + rhs_vf(ps(k))%sf(i, q, l) = rhs_vf(ps(k))%sf(i, q, l) + bub_p_src(i, q, l, k) + rhs_vf(ms(k))%sf(i, q, l) = rhs_vf(ms(k))%sf(i, q, l) + bub_m_src(i, q, l, k) + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_bubble_EE_source diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 2fc41a880..4618ef32b 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -593,22 +593,22 @@ contains if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) $:GPU_PARALLEL_LOOP(private='[k,cell]') - do k = 1, nBubs - call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) - myR0 = bub_R0(k) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myPb = gas_p(k, 2) - pint = f_cpbw_KM(myR0, myR, myV, myPb) - pint = pint + 0.5_wp*myV**2._wp - if (lag_params%cluster_type == 2) then - bub_dphidt(k) = (paux - pint) + term2 - ! Accounting for the potential induced by the bubble averaged over the control volume - ! Note that this is based on the incompressible flow assumption near the bubble. - term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) - bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) - end if - end do + do k = 1, nBubs + call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) + myR0 = bub_R0(k) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myPb = gas_p(k, 2) + pint = f_cpbw_KM(myR0, myR, myV, myPb) + pint = pint + 0.5_wp*myV**2._wp + if (lag_params%cluster_type == 2) then + bub_dphidt(k) = (paux - pint) + term2 + ! Accounting for the potential induced by the bubble averaged over the control volume + ! Note that this is based on the incompressible flow assumption near the bubble. + term1_fac = 3._wp/2._wp*(myR*(Romega**2._wp - myR**2._wp))/(Romega**3._wp - myR**3._wp) + bub_dphidt(k) = bub_dphidt(k)/(1._wp - term1_fac) + end if + end do $:END_GPU_PARALLEL_LOOP() end if @@ -617,81 +617,81 @@ contains $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') - do k = 1, nBubs - ! Keller-Miksis model - - ! Current bubble state - myPb = gas_p(k, 2) - myMass_n = gas_mg(k) - myMass_v = gas_mv(k, 2) - myR = intfc_rad(k, 2) - myV = intfc_vel(k, 2) - myBeta_c = gas_betaC(k) - myBeta_t = gas_betaT(k) - myR0 = bub_R0(k) - - ! Vapor and heat fluxes - call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) - myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) - myMvdot = 4._wp*pi*myR**2._wp*myVapFlux - - ! Obtaining driving pressure - call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) - - ! Obtain liquid density and computing speed of sound from pinf - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) - myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) - end do - call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & - myalpha_rho, Re) - call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) + do k = 1, nBubs + ! Keller-Miksis model + + ! Current bubble state + myPb = gas_p(k, 2) + myMass_n = gas_mg(k) + myMass_v = gas_mv(k, 2) + myR = intfc_rad(k, 2) + myV = intfc_vel(k, 2) + myBeta_c = gas_betaC(k) + myBeta_t = gas_betaT(k) + myR0 = bub_R0(k) + + ! Vapor and heat fluxes + call s_vflux(myR, myV, myPb, myMass_v, k, myVapFlux, myMass_n, myBeta_c, myR_m, mygamma_m) + myPbdot = f_bpres_dot(myVapFlux, myR, myV, myPb, myMass_v, k, myBeta_t, myR_m, mygamma_m) + myMvdot = 4._wp*pi*myR**2._wp*myVapFlux + + ! Obtaining driving pressure + call s_get_pinf(k, q_prim_vf, 1, myPinf, cell, aux1, aux2) + + ! Obtain liquid density and computing speed of sound from pinf + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + myalpha_rho(i) = q_prim_vf(i)%sf(cell(1), cell(2), cell(3)) + myalpha(i) = q_prim_vf(E_idx + i)%sf(cell(1), cell(2), cell(3)) + end do + call s_convert_species_to_mixture_variables_acc(myRho, gamma, pi_inf, qv, myalpha, & + myalpha_rho, Re) + call s_compute_cson_from_pinf(q_prim_vf, myPinf, cell, myRho, gamma, pi_inf, myCson) - ! Adaptive time stepping - adap_dt_stop = 0 + ! Adaptive time stepping + adap_dt_stop = 0 - if (adap_dt) then + if (adap_dt) then - call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & - dmntait, dmBtait, dm_bub_adv_src, dm_divu, & - k, myMass_v, myMass_n, myBeta_c, & - myBeta_t, myCson, adap_dt_stop) + call s_advance_step(myRho, myPinf, myR, myV, myR0, myPb, myPbdot, dmalf, & + dmntait, dmBtait, dm_bub_adv_src, dm_divu, & + k, myMass_v, myMass_n, myBeta_c, & + myBeta_t, myCson, adap_dt_stop) - ! Update bubble state - intfc_rad(k, 1) = myR - intfc_vel(k, 1) = myV - gas_p(k, 1) = myPb - gas_mv(k, 1) = myMass_v + ! Update bubble state + intfc_rad(k, 1) = myR + intfc_vel(k, 1) = myV + gas_p(k, 1) = myPb + gas_mv(k, 1) = myMass_v - else + else - ! Radial acceleration from bubble models - intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & - myPb, myPbdot, dmalf, dmntait, dmBtait, & - dm_bub_adv_src, dm_divu, & - myCson) - intfc_draddt(k, stage) = myV - gas_dmvdt(k, stage) = myMvdot - gas_dpdt(k, stage) = myPbdot + ! Radial acceleration from bubble models + intfc_dveldt(k, stage) = f_rddot(myRho, myPinf, myR, myV, myR0, & + myPb, myPbdot, dmalf, dmntait, dmBtait, & + dm_bub_adv_src, dm_divu, & + myCson) + intfc_draddt(k, stage) = myV + gas_dmvdt(k, stage) = myMvdot + gas_dpdt(k, stage) = myPbdot - end if + end if - adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) + adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) - end do + end do $:END_GPU_PARALLEL_LOOP() if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") ! Bubbles remain in a fixed position $:GPU_PARALLEL_LOOP(collapse=2, private='[k,l]', copyin='[stage]') - do k = 1, nBubs - do l = 1, 3 - mtn_dposdt(k, l, stage) = 0._wp - mtn_dveldt(k, l, stage) = 0._wp - end do + do k = 1, nBubs + do l = 1, 3 + mtn_dposdt(k, l, stage) = 0._wp + mtn_dveldt(k, l, stage) = 0._wp end do + end do $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -718,36 +718,36 @@ contains ! (q / (1 - beta)) * d(beta)/dt source if (p == 0) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & - q_beta%vf(5)%sf(i, j, k)) - - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)*(q_beta%vf(2)%sf(i, j, k) + & + q_beta%vf(5)%sf(i, j, k)) + + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(2)%sf(i, j, k) - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)/q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(2)%sf(i, j, k) + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -757,46 +757,46 @@ contains ! (q / (1 - beta)) * d(beta)/dt source $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & - (1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k)* & - q_beta%vf(3)%sf(i, j, k) - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & + (1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k)* & + q_beta%vf(3)%sf(i, j, k) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() !source in energy $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) - end do + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) end do end do + end do $:END_GPU_PARALLEL_LOOP() call s_gradient_dir(q_beta%vf(3), q_beta%vf(4), l) ! (beta / (1 - beta)) * d(Pu)/dl source $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & - q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & - q_beta%vf(1)%sf(i, j, k) - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta%vf(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + q_beta%vf(4)%sf(i, j, k)*(1._wp - q_beta%vf(1)%sf(i, j, k))/ & + q_beta%vf(1)%sf(i, j, k) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end do @@ -844,15 +844,15 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, q_beta_idx - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(i)%sf(j, k, l) = 0._wp - end do + do i = 1, q_beta_idx + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(i)%sf(j, k, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() call s_smoothfunction(nBubs, intfc_rad, intfc_vel, & @@ -860,16 +860,16 @@ contains !Store 1-beta $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) - ! Limiting void fraction given max value - q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & - 1._wp - lag_params%valmaxvoid) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta%vf(1)%sf(j, k, l) = 1._wp - q_beta%vf(1)%sf(j, k, l) + ! Limiting void fraction given max value + q_beta%vf(1)%sf(j, k, l) = max(q_beta%vf(1)%sf(j, k, l), & + 1._wp - lag_params%valmaxvoid) end do end do + end do $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -1102,15 +1102,15 @@ contains if (time_stepper == 1) then ! 1st order TVD RK $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 1) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 1) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() @@ -1125,28 +1125,28 @@ contains elseif (time_stepper == 2) then ! 2nd order TVD RK if (stage == 1) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do $:END_GPU_PARALLEL_LOOP() elseif (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) - intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp - intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp - gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp - gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp - end do + do k = 1, nBubs + !u{1} = u{n} + (1/2) * dt * (RHS{n} + RHS{1}) + intfc_rad(k, 1) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/2._wp + intfc_vel(k, 1) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/2._wp + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/2._wp + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/2._wp + gas_p(k, 1) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/2._wp + gas_mv(k, 1) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/2._wp + end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() @@ -1163,40 +1163,40 @@ contains elseif (time_stepper == 3) then ! 3rd order TVD RK if (stage == 1) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{1} = u{n} + dt * RHS{n} - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) - gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) - end do + do k = 1, nBubs + !u{1} = u{n} + dt * RHS{n} + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*intfc_draddt(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*intfc_dveldt(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*mtn_dposdt(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*mtn_dveldt(k, 1:3, 1) + gas_p(k, 2) = gas_p(k, 1) + dt*gas_dpdt(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + dt*gas_dmvdt(k, 1) + end do $:END_GPU_PARALLEL_LOOP() elseif (stage == 2) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] - intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp - intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp - gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp - gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp - end do + do k = 1, nBubs + !u{2} = u{n} + (1/4) * dt * [RHS{n} + RHS{1}] + intfc_rad(k, 2) = intfc_rad(k, 1) + dt*(intfc_draddt(k, 1) + intfc_draddt(k, 2))/4._wp + intfc_vel(k, 2) = intfc_vel(k, 1) + dt*(intfc_dveldt(k, 1) + intfc_dveldt(k, 2))/4._wp + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + dt*(mtn_dposdt(k, 1:3, 1) + mtn_dposdt(k, 1:3, 2))/4._wp + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + dt*(mtn_dveldt(k, 1:3, 1) + mtn_dveldt(k, 1:3, 2))/4._wp + gas_p(k, 2) = gas_p(k, 1) + dt*(gas_dpdt(k, 1) + gas_dpdt(k, 2))/4._wp + gas_mv(k, 2) = gas_mv(k, 1) + dt*(gas_dmvdt(k, 1) + gas_dmvdt(k, 2))/4._wp + end do $:END_GPU_PARALLEL_LOOP() elseif (stage == 3) then $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] - intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) - intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) - mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) - mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) - gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) - gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) - end do + do k = 1, nBubs + !u{n+1} = u{n} + (2/3) * dt * [(1/4)* RHS{n} + (1/4)* RHS{1} + RHS{2}] + intfc_rad(k, 1) = intfc_rad(k, 1) + (2._wp/3._wp)*dt*(intfc_draddt(k, 1)/4._wp + intfc_draddt(k, 2)/4._wp + intfc_draddt(k, 3)) + intfc_vel(k, 1) = intfc_vel(k, 1) + (2._wp/3._wp)*dt*(intfc_dveldt(k, 1)/4._wp + intfc_dveldt(k, 2)/4._wp + intfc_dveldt(k, 3)) + mtn_pos(k, 1:3, 1) = mtn_pos(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dposdt(k, 1:3, 1)/4._wp + mtn_dposdt(k, 1:3, 2)/4._wp + mtn_dposdt(k, 1:3, 3)) + mtn_vel(k, 1:3, 1) = mtn_vel(k, 1:3, 1) + (2._wp/3._wp)*dt*(mtn_dveldt(k, 1:3, 1)/4._wp + mtn_dveldt(k, 1:3, 2)/4._wp + mtn_dveldt(k, 1:3, 3)) + gas_p(k, 1) = gas_p(k, 1) + (2._wp/3._wp)*dt*(gas_dpdt(k, 1)/4._wp + gas_dpdt(k, 2)/4._wp + gas_dpdt(k, 3)) + gas_mv(k, 1) = gas_mv(k, 1) + (2._wp/3._wp)*dt*(gas_dmvdt(k, 1)/4._wp + gas_dmvdt(k, 2)/4._wp + gas_dmvdt(k, 3)) + end do $:END_GPU_PARALLEL_LOOP() call s_transfer_data_to_tmp() @@ -1275,16 +1275,16 @@ contains integer :: k $:GPU_PARALLEL_LOOP(private='[k]') - do k = 1, nBubs - gas_p(k, 2) = gas_p(k, 1) - gas_mv(k, 2) = gas_mv(k, 1) - intfc_rad(k, 2) = intfc_rad(k, 1) - intfc_vel(k, 2) = intfc_vel(k, 1) - mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) - mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) - mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) - mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) - end do + do k = 1, nBubs + gas_p(k, 2) = gas_p(k, 1) + gas_mv(k, 2) = gas_mv(k, 1) + intfc_rad(k, 2) = intfc_rad(k, 1) + intfc_vel(k, 2) = intfc_vel(k, 1) + mtn_pos(k, 1:3, 2) = mtn_pos(k, 1:3, 1) + mtn_posPrev(k, 1:3, 2) = mtn_posPrev(k, 1:3, 1) + mtn_vel(k, 1:3, 2) = mtn_vel(k, 1:3, 1) + mtn_s(k, 1:3, 2) = mtn_s(k, 1:3, 1) + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_transfer_data_to_tmp @@ -1374,47 +1374,47 @@ contains if (dir == 1) then ! Gradient in x dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & - + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & - - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) - end do + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dx(i + 1) - dx(i - 1)) & + + q%sf(i + 1, j, k)*(dx(i) + dx(i - 1)) & + - q%sf(i - 1, j, k)*(dx(i) + dx(i + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (dir == 2) then ! Gradient in y dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) - end do + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q%sf(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q%sf(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (dir == 3) then ! Gradient in z dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq%sf(i, j, k) = dq%sf(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) - end do + do k = 0, p + do j = 0, n + do i = 0, m + dq%sf(i, j, k) = q%sf(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q%sf(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q%sf(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq%sf(i, j, k) = dq%sf(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1512,18 +1512,18 @@ contains lag_void_avg = 0._wp lag_vol = 0._wp $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') - do k = 0, p - do j = 0, n - do i = 0, m - lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) - call s_get_char_vol(i, j, k, volcell) - if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then - lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell - lag_vol = lag_vol + volcell - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_beta%vf(1)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_beta%vf(1)%sf(i, j, k)) > 5.0d-11) then + lag_void_avg = lag_void_avg + (1._wp - q_beta%vf(1)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() #ifdef MFC_MPI @@ -1709,12 +1709,12 @@ contains $:GPU_PARALLEL_LOOP(private='[k]', reduction='[[Rmax_glb], [Rmin_glb]]', & & reductionOp='[MAX, MIN]', copy='[Rmax_glb,Rmin_glb]') - do k = 1, nBubs - Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) - Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) - Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) - end do + do k = 1, nBubs + Rmax_glb = max(Rmax_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmin_glb = min(Rmin_glb, intfc_rad(k, 1)/bub_R0(k)) + Rmax_stats(k) = max(Rmax_stats(k), intfc_rad(k, 1)/bub_R0(k)) + Rmin_stats(k) = min(Rmin_stats(k), intfc_rad(k, 1)/bub_R0(k)) + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_calculate_lag_bubble_stats diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 7ee813bed..0607af437 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -56,40 +56,40 @@ contains integer :: l $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') - do l = 1, nBubs + do l = 1, nBubs - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - call s_get_cell(s_coord, cell) + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + call s_get_cell(s_coord, cell) - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if + if (num_dims == 2) then + Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + end if - !Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 + !Update void fraction field + addFun1 = strength_vol/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(1)%sf(cell(1), cell(2), cell(3)) + addFun1 + + !Update time derivative of void fraction + addFun2 = strength_vel/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 - !Update time derivative of void fraction - addFun2 = strength_vel/Vol + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = (strength_vol*strength_vel)/Vol $:GPU_ATOMIC(atomic='update') - updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(2)%sf(cell(1), cell(2), cell(3)) + addFun2 - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 - end if - end do + updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) = updatedvar%vf(5)%sf(cell(1), cell(2), cell(3)) + addFun3 + end if + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_deltafunc @@ -122,80 +122,80 @@ contains if (p == 0) smearGridz = 1 $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - center(1:2) = lbk_pos(l, 1:2, 2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - !Check if the cells intended to smear the bubbles in are in the computational domain - !and redefine the cells for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) - if (p == 0) cellaux(3) = 0 + do l = 1, nBubs + nodecoord(1:3) = 0 + center(1:3) = 0._wp + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + center(1:2) = lbk_pos(l, 1:2, 2) + if (p > 0) center(3) = lbk_pos(l, 3, 2) + call s_get_cell(s_coord, cell) + call s_compute_stddsv(cell, volpart, stddsv) + + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + + $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') + do i = 1, smearGrid + do j = 1, smearGrid + do k = 1, smearGridz + cellaux(1) = cell(1) + i - (mapCells + 1) + cellaux(2) = cell(2) + j - (mapCells + 1) + cellaux(3) = cell(3) + k - (mapCells + 1) + if (p == 0) cellaux(3) = 0 + + !Check if the cells intended to smear the bubbles in are in the computational domain + !and redefine the cells for symmetric boundary + call s_check_celloutside(cellaux, celloutside) + + if (.not. celloutside) then + + nodecoord(1) = x_cc(cellaux(1)) + nodecoord(2) = y_cc(cellaux(2)) + if (p > 0) nodecoord(3) = z_cc(cellaux(3)) + call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) + if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) + + ! Relocate cells for bubbles intersecting symmetric boundaries + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then + call s_shift_cell_symmetric_bc(cellaux, cell) end if - - !Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun1 - - !Update time derivative of void fraction - addFun2 = func*strength_vel + else + func = 0._wp + func2 = 0._wp + cellaux(1) = cell(1) + cellaux(2) = cell(2) + cellaux(3) = cell(3) + if (p == 0) cellaux(3) = 0 + end if + + !Update void fraction field + addFun1 = func*strength_vol + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun1 + + !Update time derivative of void fraction + addFun2 = func*strength_vel + $:GPU_ATOMIC(atomic='update') + updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun2 + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = func2*strength_vol*strength_vel $:GPU_ATOMIC(atomic='update') - updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun2 - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = func2*strength_vol*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + addFun3 - end if - end do + updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar%vf(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + addFun3 + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_gaussian diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 29332f0c5..fec4fc2dc 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -711,29 +711,29 @@ contains is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_rs${XYZ}$_vf_l(0, k, r, i) = F_rs${XYZ}$_vf(0, k, r, i) & - + pi_coef_${XYZ}$ (0, 0, cbc_loc)* & - (F_rs${XYZ}$_vf(1, k, r, i) - & - F_rs${XYZ}$_vf(0, k, r, i)) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_rs${XYZ}$_vf_l(0, k, r, i) = F_rs${XYZ}$_vf(0, k, r, i) & + + pi_coef_${XYZ}$ (0, 0, cbc_loc)* & + (F_rs${XYZ}$_vf(1, k, r, i) - & + F_rs${XYZ}$_vf(0, k, r, i)) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,r,k]', collapse=3) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_src_rs${XYZ}$_vf_l(0, k, r, i) = F_src_rs${XYZ}$_vf(0, k, r, i) + & - (F_src_rs${XYZ}$_vf(1, k, r, i) - & - F_src_rs${XYZ}$_vf(0, k, r, i)) & - *pi_coef_${XYZ}$ (0, 0, cbc_loc) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_src_rs${XYZ}$_vf_l(0, k, r, i) = F_src_rs${XYZ}$_vf(0, k, r, i) + & + (F_src_rs${XYZ}$_vf(1, k, r, i) - & + F_src_rs${XYZ}$_vf(0, k, r, i)) & + *pi_coef_${XYZ}$ (0, 0, cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 @@ -744,388 +744,388 @@ contains is1, is2, is3, idwbuff(2)%beg, idwbuff(3)%beg) $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) - do i = 1, flux_cbc_index - do j = 0, 1 - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_rs${XYZ}$_vf_l(j, k, r, i) = F_rs${XYZ}$_vf(j, k, r, i) & - + pi_coef_${XYZ}$ (j, 0, cbc_loc)* & - (F_rs${XYZ}$_vf(3, k, r, i) - & - F_rs${XYZ}$_vf(2, k, r, i)) & - + pi_coef_${XYZ}$ (j, 1, cbc_loc)* & - (F_rs${XYZ}$_vf(2, k, r, i) - & - F_rs${XYZ}$_vf(1, k, r, i)) & - + pi_coef_${XYZ}$ (j, 2, cbc_loc)* & - (F_rs${XYZ}$_vf(1, k, r, i) - & - F_rs${XYZ}$_vf(0, k, r, i)) - end do + do i = 1, flux_cbc_index + do j = 0, 1 + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_rs${XYZ}$_vf_l(j, k, r, i) = F_rs${XYZ}$_vf(j, k, r, i) & + + pi_coef_${XYZ}$ (j, 0, cbc_loc)* & + (F_rs${XYZ}$_vf(3, k, r, i) - & + F_rs${XYZ}$_vf(2, k, r, i)) & + + pi_coef_${XYZ}$ (j, 1, cbc_loc)* & + (F_rs${XYZ}$_vf(2, k, r, i) - & + F_rs${XYZ}$_vf(1, k, r, i)) & + + pi_coef_${XYZ}$ (j, 2, cbc_loc)* & + (F_rs${XYZ}$_vf(1, k, r, i) - & + F_rs${XYZ}$_vf(0, k, r, i)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,r,k]', collapse=4) - do i = advxb, advxe - do j = 0, 1 - do r = is3%beg, is3%end - do k = is2%beg, is2%end - flux_src_rs${XYZ}$_vf_l(j, k, r, i) = F_src_rs${XYZ}$_vf(j, k, r, i) + & - (F_src_rs${XYZ}$_vf(3, k, r, i) - & - F_src_rs${XYZ}$_vf(2, k, r, i)) & - *pi_coef_${XYZ}$ (j, 0, cbc_loc) + & - (F_src_rs${XYZ}$_vf(2, k, r, i) - & - F_src_rs${XYZ}$_vf(1, k, r, i)) & - *pi_coef_${XYZ}$ (j, 1, cbc_loc) + & - (F_src_rs${XYZ}$_vf(1, k, r, i) - & - F_src_rs${XYZ}$_vf(0, k, r, i)) & - *pi_coef_${XYZ}$ (j, 2, cbc_loc) - end do + do i = advxb, advxe + do j = 0, 1 + do r = is3%beg, is3%end + do k = is2%beg, is2%end + flux_src_rs${XYZ}$_vf_l(j, k, r, i) = F_src_rs${XYZ}$_vf(j, k, r, i) + & + (F_src_rs${XYZ}$_vf(3, k, r, i) - & + F_src_rs${XYZ}$_vf(2, k, r, i)) & + *pi_coef_${XYZ}$ (j, 0, cbc_loc) + & + (F_src_rs${XYZ}$_vf(2, k, r, i) - & + F_src_rs${XYZ}$_vf(1, k, r, i)) & + *pi_coef_${XYZ}$ (j, 1, cbc_loc) + & + (F_src_rs${XYZ}$_vf(1, k, r, i) - & + F_src_rs${XYZ}$_vf(0, k, r, i)) & + *pi_coef_${XYZ}$ (j, 2, cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if ! FD2 or FD4 of RHS at j = 0 $:GPU_PARALLEL_LOOP(collapse=2, private='[r,k,alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds,dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs]') - do r = is3%beg, is3%end - do k = is2%beg, is2%end + do r = is3%beg, is3%end + do k = is2%beg, is2%end - ! Transferring the Primitive Variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do + ! Transferring the Primitive Variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) + end do - vel_K_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_K_sum = vel_K_sum + vel(i)**2._wp - end do + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_K_sum = vel_K_sum + vel(i)**2._wp + end do - pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) + pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) + end do + + if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + else + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + mf(i) = alpha_rho(i)/rho + end do + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) + do i = chemxb, chemxe + Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + call get_mixture_molecular_weight(Ys, Mw) + R_gas = gas_constant/Mw + T = pres/rho/R_gas + call get_mixture_specific_heat_cp_mass(T, Ys, Cp) + call get_mixture_energy_mass(T, Ys, e_mix) + E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + call get_mole_fractions(Mw, Ys, Xs) + call get_species_specific_heats_r(T, Cp_i) + Gamma_i = Cp_i/(Cp_i - 1.0_wp) + gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cv_mass(T, Ys, Cv) + gamma = 1.0_wp/(Cp/Cv - 1.0_wp) end if + else + E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - mf(i) = alpha_rho(i)/rho - end do + H = (E + pres)/rho - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) - call get_mixture_molecular_weight(Ys, Mw) - R_gas = gas_constant/Mw - T = pres/rho/R_gas - call get_mixture_specific_heat_cp_mass(T, Ys, Cp) - call get_mixture_energy_mass(T, Ys, e_mix) - E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - call get_mole_fractions(Mw, Ys, Xs) - call get_species_specific_heats_r(T, Cp_i) - Gamma_i = Cp_i/(Cp_i - 1.0_wp) - gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cv_mass(T, Ys, Cv) - gamma = 1.0_wp/(Cp/Cv - 1.0_wp) - end if - else - E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum - end if + ! First-Order Spatial Derivatives of Primitive Variables - H = (E + pres)/rho + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_ds(i) = 0._wp + end do - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel_ds(i) = 0._wp + end do - ! First-Order Spatial Derivatives of Primitive Variables + dpres_ds = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + dadv_ds(i) = 0._wp + end do + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_ds(i) = 0._wp + do i = 1, num_species + dYs_ds(i) = 0._wp end do + end if + + $:GPU_LOOP(parallelism='[seq]') + do j = 0, buff_size + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dalpha_rho_ds(i) + end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - dvel_ds(i) = 0._wp + dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dvel_ds(i) end do - dpres_ds = 0._wp + dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dpres_ds $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx - dadv_ds(i) = 0._wp + dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dadv_ds(i) end do if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species - dYs_ds(i) = 0._wp + dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dYs_ds(i) end do end if + end do - $:GPU_LOOP(parallelism='[seq]') - do j = 0, buff_size - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dalpha_rho_ds(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dvel_ds(i) - end do - - dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dpres_ds + ! First-Order Temporal Derivatives of Primitive Variables + lambda(1) = vel(dir_idx(1)) - c + lambda(2) = vel(dir_idx(1)) + lambda(3) = vel(dir_idx(1)) + c + + Ma = vel(dir_idx(1))/c + + if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then + call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then + call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then + call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) + ! Add GRCBC for Subsonic Inflow + if (bc_${XYZ}$%grcbc_in) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dadv_ds(i) + do i = 2, momxb + L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do - - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dYs_ds(i) - end do - end if - end do - - ! First-Order Temporal Derivatives of Primitive Variables - lambda(1) = vel(dir_idx(1)) - c - lambda(2) = vel(dir_idx(1)) - lambda(3) = vel(dir_idx(1)) + c - - Ma = vel(dir_idx(1))/c - - if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then - call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then - call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then - call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) - ! Add GRCBC for Subsonic Inflow - if (bc_${XYZ}$%grcbc_in) then - $:GPU_LOOP(parallelism='[seq]') - do i = 2, momxb - L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end do - if (n > 0) then - L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) - if (p > 0) then - L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) - end if + if (n > 0) then + L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) + if (p > 0) then + L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) end if - $:GPU_LOOP(parallelism='[seq]') - do i = E_idx, advxe - 1 - L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end do - L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then - call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - ! Add GRCBC for Subsonic Outflow (Pressure) - if (bc_${XYZ}$%grcbc_out) then - L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) - - ! Add GRCBC for Subsonic Outflow (Normal Velocity) - if (bc_${XYZ}$%grcbc_vel_out) then - L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = E_idx, advxe - 1 + L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end do + L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then + call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + ! Add GRCBC for Subsonic Outflow (Pressure) + if (bc_${XYZ}$%grcbc_out) then + L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) + + ! Add GRCBC for Subsonic Outflow (Normal Velocity) + if (bc_${XYZ}$%grcbc_vel_out) then + L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then - call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then - call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then - call s_compute_supersonic_inflow_L(L) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then - call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then + call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then + call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then + call s_compute_supersonic_inflow_L(L) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then + call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + end if + + ! Be careful about the cylindrical coordinate! + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & + /y_cc(n) + else + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_dt(i) = & + -(L(i + 1) - mf(i)*dpres_dt)/(c*c) + end do - ! Be careful about the cylindrical coordinate! - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & - /y_cc(n) - else - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & + (L(1) - L(advxe))/(2._wp*rho*c) + & + (dir_flg(dir_idx(i)) - 1._wp)* & + L(momxb + i - 1) + end do + + vel_dv_dt_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) + end do + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_dt(i) = & - -(L(i + 1) - mf(i)*dpres_dt)/(c*c) + do i = 1, num_species + dYs_dt(i) = -1._wp*L(chemxb + i - 1) end do + end if + ! The treatment of void fraction source is unclear + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & - (L(1) - L(advxe))/(2._wp*rho*c) + & - (dir_flg(dir_idx(i)) - 1._wp)* & - L(momxb + i - 1) + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) end do - - vel_dv_dt_sum = 0._wp + else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) end do + end if - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_dt(i) = -1._wp*L(chemxb + i - 1) - end do - end if + drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp - ! The treatment of void fraction source is unclear - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) - end do - end if + if (model_eqns == 1) then + drho_dt = dalpha_rho_dt(1) + dgamma_dt = dadv_dt(1) + dpi_inf_dt = dadv_dt(2) + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + drho_dt = drho_dt + dalpha_rho_dt(i) + dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) + dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) + dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) + end do + end if - drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp + ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dalpha_rho_dt(i) + end do - if (model_eqns == 1) then - drho_dt = dalpha_rho_dt(1) - dgamma_dt = dadv_dt(1) - dpi_inf_dt = dadv_dt(2) - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - drho_dt = drho_dt + dalpha_rho_dt(i) - dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) - dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) - dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*(vel(i - contxe)*drho_dt & + + rho*dvel_dt(i - contxe)) + end do - ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + if (chemistry) then + ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 + call get_species_enthalpies_rt(T, h_k) + sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dalpha_rho_dt(i) + do i = 1, num_species + #:block UNDEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) + #:endblock UNDEF_AMD + + #:block DEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) + #:endblock DEF_AMD end do - + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*(vel(i - contxe)*drho_dt & - + rho*dvel_dt(i - contxe)) + do i = 1, num_species + flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) + end do + else + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*(pres*dgamma_dt & + + gamma*dpres_dt & + + dpi_inf_dt & + + dqv_dt & + + rho*vel_dv_dt_sum & + + 5.e-1_wp*drho_dt*vel_K_sum) + end if + + if (riemann_solver == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp end do - if (chemistry) then - ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 - call get_species_enthalpies_rt(T, h_k) - sum_Enthalpies = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - #:block UNDEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) - #:endblock UNDEF_AMD - - #:block DEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) - #:endblock DEF_AMD - end do - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & - + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) - end do - else - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*(pres*dgamma_dt & - + gamma*dpres_dt & - + dpi_inf_dt & - + dqv_dt & - + rho*vel_dv_dt_sum & - + 5.e-1_wp*drho_dt*vel_K_sum) - end if - - if (riemann_solver == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & - 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & - *sign(1._wp, vel(dir_idx(1))) & - *(flux_rs${XYZ}$_vf_l(0, k, r, i) & - + vel(dir_idx(1)) & - *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dadv_dt(i - E_idx)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & + 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & + *sign(1._wp, vel(dir_idx(1))) & + *(flux_rs${XYZ}$_vf_l(0, k, r, i) & + + vel(dir_idx(1)) & + *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dadv_dt(i - E_idx)) + end do - else + else - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & - ds(0)*dadv_dt(i - E_idx) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & + ds(0)*dadv_dt(i - E_idx) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) + end do - end if - ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + end if + ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 - end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1188,79 +1188,79 @@ contains if (cbc_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsx_vf(j, k, r, i) = & - q_prim_vf(i)%sf(dj*(m - 2*j) + j, k, r) - end do + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsx_vf(j, k, r, i) = & + q_prim_vf(i)%sf(dj*(m - 2*j) + j, k, r) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsx_vf(j, k, r, momxb) = & - q_prim_vf(momxb)%sf(dj*(m - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsx_vf(j, k, r, momxb) = & + q_prim_vf(momxb)%sf(dj*(m - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsx_vf_l(j, k, r, i) = & - flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsx_vf_l(j, k, r, i) = & + flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsx_vf_l(j, k, r, momxb) = & - flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsx_vf_l(j, k, r, momxb) = & + flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsx_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsx_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsx_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsx_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1270,79 +1270,79 @@ contains elseif (cbc_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsy_vf(j, k, r, i) = & - q_prim_vf(i)%sf(k, dj*(n - 2*j) + j, r) - end do + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsy_vf(j, k, r, i) = & + q_prim_vf(i)%sf(k, dj*(n - 2*j) + j, r) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsy_vf(j, k, r, momxb + 1) = & - q_prim_vf(momxb + 1)%sf(k, dj*(n - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsy_vf(j, k, r, momxb + 1) = & + q_prim_vf(momxb + 1)%sf(k, dj*(n - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsy_vf_l(j, k, r, i) = & - flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsy_vf_l(j, k, r, i) = & + flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsy_vf_l(j, k, r, momxb + 1) = & - flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsy_vf_l(j, k, r, momxb + 1) = & + flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsy_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsy_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsy_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsy_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1352,79 +1352,79 @@ contains else $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, sys_size - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsz_vf(j, k, r, i) = & - q_prim_vf(i)%sf(r, k, dj*(p - 2*j) + j) - end do + do i = 1, sys_size + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsz_vf(j, k, r, i) = & + q_prim_vf(i)%sf(r, k, dj*(p - 2*j) + j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = 0, buff_size - q_prim_rsz_vf(j, k, r, momxe) = & - q_prim_vf(momxe)%sf(r, k, dj*(p - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = 0, buff_size + q_prim_rsz_vf(j, k, r, momxe) = & + q_prim_vf(momxe)%sf(r, k, dj*(p - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsz_vf_l(j, k, r, i) = & - flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsz_vf_l(j, k, r, i) = & + flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_rsz_vf_l(j, k, r, momxe) = & - flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_rsz_vf_l(j, k, r, momxe) = & + flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsz_vf_l(j, k, r, i) = & - flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsz_vf_l(j, k, r, i) = & + flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_rsz_vf_l(j, k, r, advxb) = & - flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_rsz_vf_l(j, k, r, advxb) = & + flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1456,53 +1456,53 @@ contains if (cbc_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_rsx_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_rsx_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_rsx_vf_l(j, k, r, momxb) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_rsx_vf_l(j, k, r, momxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_src_rsx_vf_l(j, k, r, i) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(i)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_src_rsx_vf_l(j, k, r, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & - flux_src_rsx_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(dj*((m - 1) - 2*j) + j, k, r) = & + flux_src_rsx_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if ! END: Reshaping Outputted Data in x-direction @@ -1511,54 +1511,54 @@ contains elseif (cbc_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_rsy_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_rsy_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_rsy_vf_l(j, k, r, momxb + 1) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxb + 1)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_rsy_vf_l(j, k, r, momxb + 1) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_src_rsy_vf_l(j, k, r, i) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(i)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_src_rsy_vf_l(j, k, r, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r) = & - flux_src_rsy_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(k, dj*((n - 1) - 2*j) + j, r) = & + flux_src_rsy_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1568,54 +1568,54 @@ contains else $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = 1, flux_cbc_index - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_rsz_vf_l(j, k, r, i)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do i = 1, flux_cbc_index + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_rsz_vf_l(j, k, r, i)* & + sign(1._wp, -1._wp*cbc_loc) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_rsz_vf_l(j, k, r, momxe) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_vf(momxe)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_rsz_vf_l(j, k, r, momxe) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,r]', collapse=4) - do i = advxb, advxe - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_src_rsz_vf_l(j, k, r, i) - end do + do i = advxb, advxe + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(i)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_src_rsz_vf_l(j, k, r, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,r]', collapse=3) - do r = is3%beg, is3%end - do k = is2%beg, is2%end - do j = -1, buff_size - flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j) = & - flux_src_rsz_vf_l(j, k, r, advxb)* & - sign(1._wp, -1._wp*cbc_loc) - end do + do r = is3%beg, is3%end + do k = is2%beg, is2%end + do j = -1, buff_size + flux_src_vf(advxb)%sf(r, k, dj*((p - 1) - 2*j) + j) = & + flux_src_rsz_vf_l(j, k, r, advxb)* & + sign(1._wp, -1._wp*cbc_loc) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index aabf1f1c8..e333a86f8 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -280,22 +280,22 @@ contains ! Computing Stability Criteria at Current Time-step $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + do l = 0, p + do k = 0, n + do j = 0, m + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - if (viscous) then - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) - else - call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) - end if + if (viscous) then + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf, vcfl_sf, Rc_sf) + else + call s_compute_stability_from_dt(vel, c, rho, Re, j, k, l, icfl_sf) + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! end: Computing Stability Criteria at Current Time-step diff --git a/src/simulation/m_derived_variables.fpp b/src/simulation/m_derived_variables.fpp index 93151024c..c49929e92 100644 --- a/src/simulation/m_derived_variables.fpp +++ b/src/simulation/m_derived_variables.fpp @@ -147,22 +147,22 @@ contains end if $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (p > 0) then - accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & - y_accel(i, j, k)**2._wp + & - z_accel(i, j, k)**2._wp) - elseif (n > 0) then - accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & - y_accel(i, j, k)**2._wp) - else - accel_mag(i, j, k) = x_accel(i, j, k) - end if - end do + do k = 0, p + do j = 0, n + do i = 0, m + if (p > 0) then + accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & + y_accel(i, j, k)**2._wp + & + z_accel(i, j, k)**2._wp) + elseif (n > 0) then + accel_mag(i, j, k) = sqrt(x_accel(i, j, k)**2._wp + & + y_accel(i, j, k)**2._wp) + else + accel_mag(i, j, k) = x_accel(i, j, k) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_UPDATE(host='[accel_mag]') @@ -205,34 +205,51 @@ contains ! Computing the acceleration component in the x-coordinate direction if (i == 1) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxb)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxb)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxb)%sf(j, k, l))/(6._wp*dt) - end do + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxb)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxb)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxb)%sf(j, k, l))/(6._wp*dt) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n == 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) - end do + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb)%sf(j, r + k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + if (grid_geometry == 3) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -241,68 +258,68 @@ contains + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & q_prim_vf0(momxb)%sf(r + j, k, l) & + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) + q_prim_vf0(momxb)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb)%sf(j, k, r + l)/y_cc(k) end do end do end do end do - $:END_GPU_PARALLEL_LOOP() - else - if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb)%sf(j, k, r + l)/y_cc(k) - end do - end do - end do - end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb)%sf(j, k, r + l) - end do + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb)%sf(j, k, r + l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if ! Computing the acceleration component in the y-coordinate direction elseif (i == 2) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb + 1)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxb + 1)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxb + 1)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxb + 1)%sf(j, k, l))/(6._wp*dt) - end do + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxb + 1)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxb + 1)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxb + 1)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxb + 1)%sf(j, k, l))/(6._wp*dt) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p == 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb + 1)%sf(j, r + k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + if (grid_geometry == 3) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) do l = 0, p do k = 0, n do j = 0, m @@ -311,104 +328,87 @@ contains + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) + q_prim_vf0(momxb + 1)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, r + l)/y_cc(k) & + - (q_prim_vf0(momxe)%sf(j, k, l)**2._wp)/y_cc(k) end do end do end do end do - $:END_GPU_PARALLEL_LOOP() - else - if (grid_geometry == 3) then - $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb + 1)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, r + l)/y_cc(k) & - - (q_prim_vf0(momxe)%sf(j, k, l)**2._wp)/y_cc(k) - end do - end do - end do - end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxb + 1)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxb + 1)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, r + l) - end do + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxb + 1)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxb + 1)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, r + l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if ! Computing the acceleration component in the z-coordinate direction else $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - q_sf(j, k, l) = (11._wp*q_prim_vf0(momxe)%sf(j, k, l) & - - 18._wp*q_prim_vf1(momxe)%sf(j, k, l) & - + 9._wp*q_prim_vf2(momxe)%sf(j, k, l) & - - 2._wp*q_prim_vf3(momxe)%sf(j, k, l))/(6._wp*dt) - end do + do l = 0, p + do k = 0, n + do j = 0, m + q_sf(j, k, l) = (11._wp*q_prim_vf0(momxe)%sf(j, k, l) & + - 18._wp*q_prim_vf1(momxe)%sf(j, k, l) & + + 9._wp*q_prim_vf2(momxe)%sf(j, k, l) & + - 2._wp*q_prim_vf3(momxe)%sf(j, k, l))/(6._wp*dt) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxe)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxe)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxe)%sf(j, k, r + l)/y_cc(k) & - + (q_prim_vf0(momxe)%sf(j, k, l)* & - q_prim_vf0(momxb + 1)%sf(j, k, l))/y_cc(k) - end do + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxe)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxe)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxe)%sf(j, k, r + l)/y_cc(k) & + + (q_prim_vf0(momxe)%sf(j, k, l)* & + q_prim_vf0(momxb + 1)%sf(j, k, l))/y_cc(k) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else $:GPU_PARALLEL_LOOP(private='[j,k,l,r]', collapse=4) - do l = 0, p - do k = 0, n - do j = 0, m - do r = -fd_number, fd_number - q_sf(j, k, l) = q_sf(j, k, l) & - + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & - q_prim_vf0(momxe)%sf(r + j, k, l) & - + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & - q_prim_vf0(momxe)%sf(j, r + k, l) & - + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & - q_prim_vf0(momxe)%sf(j, k, r + l) - end do + do l = 0, p + do k = 0, n + do j = 0, m + do r = -fd_number, fd_number + q_sf(j, k, l) = q_sf(j, k, l) & + + q_prim_vf0(momxb)%sf(j, k, l)*fd_coeff_x(r, j)* & + q_prim_vf0(momxe)%sf(r + j, k, l) & + + q_prim_vf0(momxb + 1)%sf(j, k, l)*fd_coeff_y(r, k)* & + q_prim_vf0(momxe)%sf(j, r + k, l) & + + q_prim_vf0(momxe)%sf(j, k, l)*fd_coeff_z(r, l)* & + q_prim_vf0(momxe)%sf(j, k, r + l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -439,79 +439,79 @@ contains if (n == 0) then !1D simulation $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - dV = dx(j) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV - end do + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + dV = dx(j) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then !2D simulation $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - dV = dx(j)*dy(k) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! y-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV - end do + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + dV = dx(j)*dy(k) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! y-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else !3D simulation $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,dV]') - do l = 0, p !Loop over grid - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids !Loop over individual fluids - - dV = dx(j)*dy(k)*dz(l) - ! Mass - $:GPU_ATOMIC(atomic='update') - c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV - ! x-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) - ! y-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) - ! z-location weighted - $:GPU_ATOMIC(atomic='update') - c_m(i, 4) = c_m(i, 4) + q_vf(i)%sf(j, k, l)*dV*z_cc(l) - ! Volume fraction - $:GPU_ATOMIC(atomic='update') - c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV - end do + do l = 0, p !Loop over grid + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids !Loop over individual fluids + + dV = dx(j)*dy(k)*dz(l) + ! Mass + $:GPU_ATOMIC(atomic='update') + c_m(i, 1) = c_m(i, 1) + q_vf(i)%sf(j, k, l)*dV + ! x-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 2) = c_m(i, 2) + q_vf(i)%sf(j, k, l)*dV*x_cc(j) + ! y-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 3) = c_m(i, 3) + q_vf(i)%sf(j, k, l)*dV*y_cc(k) + ! z-location weighted + $:GPU_ATOMIC(atomic='update') + c_m(i, 4) = c_m(i, 4) + q_vf(i)%sf(j, k, l)*dV*z_cc(l) + ! Volume fraction + $:GPU_ATOMIC(atomic='update') + c_m(i, 5) = c_m(i, 5) + q_vf(i + advxb - 1)%sf(j, k, l)*dV end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 975e32f89..f381e800d 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -142,23 +142,23 @@ contains #if defined(MFC_GPU) $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:if not USING_NVHPC @@ -180,13 +180,13 @@ contains $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') @@ -199,36 +199,36 @@ contains #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_cmplx]') @@ -244,13 +244,13 @@ contains $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_ptr='[p_real, p_fltr_cmplx]') @@ -263,14 +263,14 @@ contains #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() end do diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index ef85f272e..d6646958b 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -107,106 +107,106 @@ contains integer :: j, k, l, i, r $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') - do l = 0, p - do k = 0, n - do j = 0, m + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) + alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + ! If in simulation, use acc mixture subroutines + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & + alpha_rho_k, Re, G_local, Gs_hyper) + rho = max(rho, sgm_eps) + G_local = max(G_local, sgm_eps) + !if ( G_local <= verysmall ) G_K = 0._wp + + if (G_local > verysmall) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, tensor_size + tensora(i) = 0._wp + end do + ! STEP 1: computing the grad_xi tensor using finite differences + ! grad_xi definition / organization + ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx + ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy + ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) - alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) + do r = -fd_number, fd_number + ! derivatives in the x-direction + tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + ! derivatives in the y-direction + tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + ! derivatives in the z-direction + tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) end do - ! If in simulation, use acc mixture subroutines - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & - alpha_rho_k, Re, G_local, Gs_hyper) - rho = max(rho, sgm_eps) - G_local = max(G_local, sgm_eps) - !if ( G_local <= verysmall ) G_K = 0._wp - - if (G_local > verysmall) then + ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse + tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) + tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) + tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) + tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) + tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) + tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) + tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) + tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) + tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) + + ! STEP 2b: computing the determinant of the grad_xi tensor + tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & + - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & + + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) + + if (tensorb(tensor_size) > verysmall) then + ! STEP 2c: computing the inverse of grad_xi tensor = F + ! tensorb is the adjoint, tensora becomes F $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - tensora(i) = 0._wp + do i = 1, tensor_size - 1 + tensora(i) = tensorb(i)/tensorb(tensor_size) end do - ! STEP 1: computing the grad_xi tensor using finite differences - ! grad_xi definition / organization - ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx - ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy - ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz + + ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) + tensorb(tensor_size) = 1._wp/tensorb(tensor_size) + + ! STEP 3: computing F transpose F + tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 + tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 + tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 + tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) + tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) + tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) + ! STEP 4: update the btensor, this is consistent with Riemann solvers + #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] + btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) + #:endfor + ! store the determinant at the last entry of the btensor + btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) + ! STEP 5a: updating the Cauchy stress primitive scalar field + if (hyper_model == 1) then + call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + elseif (hyper_model == 2) then + call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + end if + ! STEP 5b: updating the pressure field + q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & + G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma + ! STEP 5c: updating the Cauchy stress conservative scalar field $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - ! derivatives in the x-direction - tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - ! derivatives in the y-direction - tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - ! derivatives in the z-direction - tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) - tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) - tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + do i = 1, b_size - 1 + q_cons_vf(strxb + i - 1)%sf(j, k, l) = & + rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) end do - ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse - tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) - tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) - tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) - tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) - tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) - tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) - tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) - tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) - tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) - - ! STEP 2b: computing the determinant of the grad_xi tensor - tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & - - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & - + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) - - if (tensorb(tensor_size) > verysmall) then - ! STEP 2c: computing the inverse of grad_xi tensor = F - ! tensorb is the adjoint, tensora becomes F - $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - 1 - tensora(i) = tensorb(i)/tensorb(tensor_size) - end do - - ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) - tensorb(tensor_size) = 1._wp/tensorb(tensor_size) - - ! STEP 3: computing F transpose F - tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 - tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 - tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 - tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) - tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) - tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) - ! STEP 4: update the btensor, this is consistent with Riemann solvers - #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] - btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) - #:endfor - ! store the determinant at the last entry of the btensor - btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) - ! STEP 5a: updating the Cauchy stress primitive scalar field - if (hyper_model == 1) then - call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - elseif (hyper_model == 2) then - call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - end if - ! STEP 5b: updating the pressure field - q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & - G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma - ! STEP 5c: updating the Cauchy stress conservative scalar field - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - q_cons_vf(strxb + i - 1)%sf(j, k, l) = & - rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) - end do - end if end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_hyperelastic_rmt_stress_update diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 36d47213a..aed818fda 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -105,270 +105,270 @@ contains ! TODO: re-organize these loops one by one for GPU efficiency if possible? $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dx_hypo(k, l, q) = 0._wp - end do + do q = 0, p + do l = 0, n + do k = 0, m + du_dx_hypo(k, l, q) = 0._wp end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dx_hypo(k, l, q) = du_dx_hypo(k, l, q) & - + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) - end do - + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dx_hypo(k, l, q) = du_dx_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) end do + end do end do + end do $:END_GPU_PARALLEL_LOOP() if (ndirs > 1) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dy_hypo(k, l, q) = 0._wp; dv_dx_hypo(k, l, q) = 0._wp; dv_dy_hypo(k, l, q) = 0._wp - end do + do q = 0, p + do l = 0, n + do k = 0, m + du_dy_hypo(k, l, q) = 0._wp; dv_dx_hypo(k, l, q) = 0._wp; dv_dy_hypo(k, l, q) = 0._wp end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dy_hypo(k, l, q) = du_dy_hypo(k, l, q) & - + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) - dv_dx_hypo(k, l, q) = dv_dx_hypo(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) - dv_dy_hypo(k, l, q) = dv_dy_hypo(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) - end do + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dy_hypo(k, l, q) = du_dy_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + dv_dx_hypo(k, l, q) = dv_dx_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + dv_dy_hypo(k, l, q) = dv_dy_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! 3D if (ndirs == 3) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - du_dz_hypo(k, l, q) = 0._wp; dv_dz_hypo(k, l, q) = 0._wp; dw_dx_hypo(k, l, q) = 0._wp; - dw_dy_hypo(k, l, q) = 0._wp; dw_dz_hypo(k, l, q) = 0._wp; - end do + do q = 0, p + do l = 0, n + do k = 0, m + du_dz_hypo(k, l, q) = 0._wp; dv_dz_hypo(k, l, q) = 0._wp; dw_dx_hypo(k, l, q) = 0._wp; + dw_dy_hypo(k, l, q) = 0._wp; dw_dz_hypo(k, l, q) = 0._wp; end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - du_dz_hypo(k, l, q) = du_dz_hypo(k, l, q) & - + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) - dv_dz_hypo(k, l, q) = dv_dz_hypo(k, l, q) & - + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) - dw_dx_hypo(k, l, q) = dw_dx_hypo(k, l, q) & - + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) - dw_dy_hypo(k, l, q) = dw_dy_hypo(k, l, q) & - + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) - dw_dz_hypo(k, l, q) = dw_dz_hypo(k, l, q) & - + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) - end do + do q = 0, p + do l = 0, n + do k = 0, m + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + du_dz_hypo(k, l, q) = du_dz_hypo(k, l, q) & + + q_prim_vf(momxb)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) + dv_dz_hypo(k, l, q) = dv_dz_hypo(k, l, q) & + + q_prim_vf(momxb + 1)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) + dw_dx_hypo(k, l, q) = dw_dx_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k + r, l, q)*fd_coeff_x_hypo(r, k) + dw_dy_hypo(k, l, q) = dw_dy_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k, l + r, q)*fd_coeff_y_hypo(r, l) + dw_dz_hypo(k, l, q) = dw_dz_hypo(k, l, q) & + + q_prim_vf(momxe)%sf(k, l, q + r)*fd_coeff_z_hypo(r, q) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rho_K = 0._wp; G_K = 0._wp - do i = 1, num_fluids - rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) - G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) - end do + do q = 0, p + do l = 0, n + do k = 0, m + rho_K = 0._wp; G_K = 0._wp + do i = 1, num_fluids + rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) + G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) + end do - if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) + if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) - rho_K_field(k, l, q) = rho_K - G_K_field(k, l, q) = G_K + rho_K_field(k, l, q) = rho_K + G_K_field(k, l, q) = G_K - !TODO: take this out if not needed - if (G_K < verysmall) then - G_K_field(k, l, q) = 0 - end if - end do + !TODO: take this out if not needed + if (G_K < verysmall) then + G_K_field(k, l, q) = 0 + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() ! apply rhs source term to elastic stress equation $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = & - rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - ((4._wp*G_K_field(k, l, q)/3._wp) + & - q_prim_vf(strxb)%sf(k, l, q))* & - du_dx_hypo(k, l, q) - end do + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = & + rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + ((4._wp*G_K_field(k, l, q)/3._wp) + & + q_prim_vf(strxb)%sf(k, l, q))* & + du_dx_hypo(k, l, q) end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy_hypo(k, l, q)) - - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy_hypo(k, l, q) + & - dv_dx_hypo(k, l, q))) - - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dv_dy_hypo(k, l, q) - (1._wp/3._wp)* & - (du_dx_hypo(k, l, q) + & - dv_dy_hypo(k, l, q)))) - end do + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dy_hypo(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dv_dy_hypo(k, l, q)) + + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dy_hypo(k, l, q) + & + dv_dx_hypo(k, l, q))) + + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dv_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dv_dy_hypo(k, l, q) - (1._wp/3._wp)* & + (du_dx_hypo(k, l, q) + & + dv_dy_hypo(k, l, q)))) end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) - & - q_prim_vf(strxb)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + do q = 0, p + do l = 0, n + do k = 0, m + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dz_hypo(k, l, q) - & + q_prim_vf(strxb)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) + + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz_hypo(k, l, q)) + + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*du_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dz_hypo(k, l, q)) - - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - 2._wp*G_K_field(k, l, q)*(1._wp/3._wp)*dw_dz_hypo(k, l, q)) - - rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz_hypo(k, l, q) + & - dw_dx_hypo(k, l, q))) - - rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz_hypo(k, l, q) + & - dw_dy_hypo(k, l, q))) - - rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & - (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) + & - q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*du_dx_hypo(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) + & - q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & - q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & - 2._wp*G_K_field(k, l, q)*(dw_dz_hypo(k, l, q) - (1._wp/3._wp)* & - (du_dx_hypo(k, l, q) + & - dv_dy_hypo(k, l, q) + & - dw_dz_hypo(k, l, q)))) - end do + rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*du_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 3)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(du_dz_hypo(k, l, q) + & + dw_dx_hypo(k, l, q))) + + rhs_vf(strxb + 4)%sf(k, l, q) = rhs_vf(strxb + 4)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxb + 3)%sf(k, l, q)*dv_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 1)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 2)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxb + 5)%sf(k, l, q)*dv_dz_hypo(k, l, q) + & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxb + 4)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(1._wp/2._wp)*(dv_dz_hypo(k, l, q) + & + dw_dy_hypo(k, l, q))) + + rhs_vf(strxe)%sf(k, l, q) = rhs_vf(strxe)%sf(k, l, q) + rho_K_field(k, l, q)* & + (q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) + & + q_prim_vf(strxe - 2)%sf(k, l, q)*dw_dx_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*du_dx_hypo(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) + & + q_prim_vf(strxe - 1)%sf(k, l, q)*dw_dy_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dv_dy_hypo(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) - & + q_prim_vf(strxe)%sf(k, l, q)*dw_dz_hypo(k, l, q) + & + 2._wp*G_K_field(k, l, q)*(dw_dz_hypo(k, l, q) - (1._wp/3._wp)* & + (du_dx_hypo(k, l, q) + & + dv_dy_hypo(k, l, q) + & + dw_dz_hypo(k, l, q)))) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord .and. idir == 2) then $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - ! S_xx -= rho * v/r * (tau_xx + 2/3*G) - rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) - & + do q = 0, p + do l = 0, n + do k = 0, m + ! S_xx -= rho * v/r * (tau_xx + 2/3*G) + rhs_vf(strxb)%sf(k, l, q) = rhs_vf(strxb)%sf(k, l, q) - & + rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & + (q_prim_vf(strxb)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_xx + 2/3*G + + ! S_xr -= rho * v/r * tau_xr + rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) - & rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - (q_prim_vf(strxb)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_xx + 2/3*G - - ! S_xr -= rho * v/r * tau_xr - rhs_vf(strxb + 1)%sf(k, l, q) = rhs_vf(strxb + 1)%sf(k, l, q) - & - rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - q_prim_vf(strxb + 1)%sf(k, l, q) ! tau_xx - - ! S_rr -= rho * v/r * (tau_rr + 2/3*G) - rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) - & - rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & - (q_prim_vf(strxb + 2)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_rr + 2/3*G - - ! S_thetatheta += rho * ( -(tau_thetatheta + 2/3*G)*(du/dx + dv/dr + v/r) + 2*(tau_thetatheta + G)*v/r ) - rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & - rho_K_field(k, l, q)*( & - -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & - (du_dx_hypo(k, l, q) + dv_dy_hypo(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & - + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) - end do + q_prim_vf(strxb + 1)%sf(k, l, q) ! tau_xx + + ! S_rr -= rho * v/r * (tau_rr + 2/3*G) + rhs_vf(strxb + 2)%sf(k, l, q) = rhs_vf(strxb + 2)%sf(k, l, q) - & + rho_K_field(k, l, q)*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)* & + (q_prim_vf(strxb + 2)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q)) ! tau_rr + 2/3*G + + ! S_thetatheta += rho * ( -(tau_thetatheta + 2/3*G)*(du/dx + dv/dr + v/r) + 2*(tau_thetatheta + G)*v/r ) + rhs_vf(strxb + 3)%sf(k, l, q) = rhs_vf(strxb + 3)%sf(k, l, q) + & + rho_K_field(k, l, q)*( & + -(q_prim_vf(strxb + 3)%sf(k, l, q) + (2._wp/3._wp)*G_K_field(k, l, q))* & + (du_dx_hypo(k, l, q) + dv_dy_hypo(k, l, q) + q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) & + + 2._wp*(q_prim_vf(strxb + 3)%sf(k, l, q) + G_K_field(k, l, q))*q_prim_vf(momxb + 1)%sf(k, l, q)/y_cc(l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -405,64 +405,64 @@ contains if (n == 0) then l = 0; q = 0 $:GPU_PARALLEL_LOOP(private='[k]', copyin='[l,q]') - do k = 0, m - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s - end do + do k = 0, m + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(q_cons_vf(stress_idx%beg)%sf(k, l, q)) - tau_star, 0._wp))**cont_damage_s + end do $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then q = 0 $:GPU_PARALLEL_LOOP(private='[k,l]', copyin='[q]', collapse=2) + do l = 0, n + do k = 0, m + ! Maximum principal stress + tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & + sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & + 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp + + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + do q = 0, p do l = 0, n do k = 0, m + tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) + tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) + tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) + tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) + tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) + tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) + + ! Invariants of the stress tensor + I1 = tau_xx + tau_yy + tau_zz + I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & + (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) + I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & + tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp + ! Maximum principal stress - tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & - q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & - sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & - q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & - 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp + temp = I1**2.0_wp - 3.0_wp*I2 + sqrt_term_1 = sqrt(max(temp, 0.0_wp)) + if (sqrt_term_1 > verysmall) then ! Avoid 0/0 + argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & + (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) + if (argument > 1.0_wp) argument = 1.0_wp + if (argument < -1.0_wp) argument = -1.0_wp + phi = acos(argument) + sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) + tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) + else + tau_p = I1/3.0_wp + end if rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do - $:END_GPU_PARALLEL_LOOP() - else - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) - tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) - tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) - tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) - tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) - tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) - - ! Invariants of the stress tensor - I1 = tau_xx + tau_yy + tau_zz - I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & - (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) - I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & - tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp - - ! Maximum principal stress - temp = I1**2.0_wp - 3.0_wp*I2 - sqrt_term_1 = sqrt(max(temp, 0.0_wp)) - if (sqrt_term_1 > verysmall) then ! Avoid 0/0 - argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & - (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) - if (argument > 1.0_wp) argument = 1.0_wp - if (argument < -1.0_wp) argument = -1.0_wp - phi = acos(argument) - sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) - tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) - else - tau_p = I1/3.0_wp - end if - - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s - end do - end do - end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 4501b52f5..3fc11a74b 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -198,191 +198,191 @@ contains type(ghost_point) :: innerp if (num_gps > 0) then $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q]') - do i = 1, num_gps + do i = 1, num_gps + + gp = ghost_points(i) + j = gp%loc(1) + k = gp%loc(2) + l = gp%loc(3) + patch_id = ghost_points(i)%ib_patch_id - gp = ghost_points(i) - j = gp%loc(1) - k = gp%loc(2) - l = gp%loc(3) - patch_id = ghost_points(i)%ib_patch_id + ! Calculate physical location of GP + if (p > 0) then + physical_loc = [x_cc(j), y_cc(k), z_cc(l)] + else + physical_loc = [x_cc(j), y_cc(k), 0._wp] + end if - ! Calculate physical location of GP - if (p > 0) then - physical_loc = [x_cc(j), y_cc(k), z_cc(l)] + !Interpolate primitive variables at image point associated w/ GP + if (bubbles_euler .and. .not. qbmm) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP) + else if (qbmm .and. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP) + else if (qbmm .and. .not. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + else + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + end if + dyn_pres = 0._wp + + ! Set q_prim_vf params at GP so that mixture vars calculated properly + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + end do + + if (surface_tension) then + q_prim_vf(c_idx)%sf(j, k, l) = c_IP + end if + if (model_eqns /= 4) then + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K, G_K, Gs) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) else - physical_loc = [x_cc(j), y_cc(k), 0._wp] + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & + alpha_rho_IP, Re_K) end if + end if - !Interpolate primitive variables at image point associated w/ GP - if (bubbles_euler .and. .not. qbmm) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP) - else if (qbmm .and. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP) - else if (qbmm .and. .not. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + ! Calculate velocity of ghost cell + if (gp%slip) then + norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) + buf = sqrt(sum(norm**2)) + norm = norm/buf + vel_norm_IP = sum(vel_IP*norm)*norm + vel_g = vel_IP - vel_norm_IP + if (patch_ib(patch_id)%moving_ibm /= 0) then + ! compute the linear velocity of the ghost point due to rotation + radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & + patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] + rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) + + ! add only the component of the IB's motion that is normal to the surface + vel_g = vel_g + sum((patch_ib(patch_id)%vel + rotation_velocity)*norm)*norm + end if + else + if (patch_ib(patch_id)%moving_ibm == 0) then + ! we know the object is not moving if moving_ibm is 0 (false) + vel_g = 0._wp else - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + ! get the vector that points from the centroid to the ghost + radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & + patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] + ! convert the angular velocity from the inertial reference frame to the fluids frame, then convert to linear velocity + rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) + do q = 1, 3 + ! if mibm is 1 or 2, then the boundary may be moving + vel_g(q) = patch_ib(patch_id)%vel(q) ! add the linear velocity + vel_g(q) = vel_g(q) + rotation_velocity(q) ! add the rotational velocity + end do end if - dyn_pres = 0._wp + end if - ! Set q_prim_vf params at GP so that mixture vars calculated properly - $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_prim_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_prim_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) - end do + ! Set momentum + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) + dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & + vel_g(q - momxb + 1)/2._wp + end do - if (surface_tension) then - q_prim_vf(c_idx)%sf(j, k, l) = c_IP - end if - if (model_eqns /= 4) then - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K, G_K, Gs) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv_K, alpha_IP, & - alpha_rho_IP, Re_K) - end if - end if + ! Set continuity and adv vars + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_fluids + q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) + q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + end do - ! Calculate velocity of ghost cell - if (gp%slip) then - norm(1:3) = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, 1:3) - buf = sqrt(sum(norm**2)) - norm = norm/buf - vel_norm_IP = sum(vel_IP*norm)*norm - vel_g = vel_IP - vel_norm_IP - if (patch_ib(patch_id)%moving_ibm /= 0) then - ! compute the linear velocity of the ghost point due to rotation - radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & - patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] - rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) - - ! add only the component of the IB's motion that is normal to the surface - vel_g = vel_g + sum((patch_ib(patch_id)%vel + rotation_velocity)*norm)*norm - end if - else - if (patch_ib(patch_id)%moving_ibm == 0) then - ! we know the object is not moving if moving_ibm is 0 (false) - vel_g = 0._wp - else - ! get the vector that points from the centroid to the ghost - radial_vector = physical_loc - [patch_ib(patch_id)%x_centroid, & - patch_ib(patch_id)%y_centroid, patch_ib(patch_id)%z_centroid] - ! convert the angular velocity from the inertial reference frame to the fluids frame, then convert to linear velocity - rotation_velocity = cross_product(matmul(patch_ib(patch_id)%rotation_matrix, patch_ib(patch_id)%angular_vel), radial_vector) - do q = 1, 3 - ! if mibm is 1 or 2, then the boundary may be moving - vel_g(q) = patch_ib(patch_id)%vel(q) ! add the linear velocity - vel_g(q) = vel_g(q) + rotation_velocity(q) ! add the rotational velocity - end do - end if - end if + ! Set color function + if (surface_tension) then + q_cons_vf(c_idx)%sf(j, k, l) = c_IP + end if - ! Set momentum + ! Set Energy + if (bubbles_euler) then + q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) + else + q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres + end if + ! Set bubble vars + if (bubbles_euler .and. .not. qbmm) then + call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = rho*vel_g(q - momxb + 1) - dyn_pres = dyn_pres + q_cons_vf(q)%sf(j, k, l)* & - vel_g(q - momxb + 1)/2._wp + do q = 1, nb + q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) + if (.not. polytropic) then + q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) + q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) + end if end do + end if - ! Set continuity and adv vars + if (qbmm) then + + nbub = nmom_IP(1) $:GPU_LOOP(parallelism='[seq]') - do q = 1, num_fluids - q_cons_vf(q)%sf(j, k, l) = alpha_rho_IP(q) - q_cons_vf(advxb + q - 1)%sf(j, k, l) = alpha_IP(q) + do q = 1, nb*nmom + q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) end do - ! Set color function - if (surface_tension) then - q_cons_vf(c_idx)%sf(j, k, l) = c_IP - end if - - ! Set Energy - if (bubbles_euler) then - q_cons_vf(E_idx)%sf(j, k, l) = (1 - alpha_IP(1))*(gamma*pres_IP + pi_inf + dyn_pres) - else - q_cons_vf(E_idx)%sf(j, k, l) = gamma*pres_IP + pi_inf + dyn_pres - end if - ! Set bubble vars - if (bubbles_euler .and. .not. qbmm) then - call s_comp_n_from_prim(alpha_IP(1), r_IP, nbub, weight) - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - q_cons_vf(bubxb + (q - 1)*2)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*2 + 1)%sf(j, k, l) = nbub*v_IP(q) - if (.not. polytropic) then - q_cons_vf(bubxb + (q - 1)*4)%sf(j, k, l) = nbub*r_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 1)%sf(j, k, l) = nbub*v_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 2)%sf(j, k, l) = nbub*pb_IP(q) - q_cons_vf(bubxb + (q - 1)*4 + 3)%sf(j, k, l) = nbub*mv_IP(q) - end if - end do - end if - - if (qbmm) then - - nbub = nmom_IP(1) - $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb*nmom - q_cons_vf(bubxb + q - 1)%sf(j, k, l) = nbub*nmom_IP(q) - end do + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub + end do + if (.not. polytropic) then $:GPU_LOOP(parallelism='[seq]') do q = 1, nb - q_cons_vf(bubxb + (q - 1)*nmom)%sf(j, k, l) = nbub - end do - - if (.not. polytropic) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - $:GPU_LOOP(parallelism='[seq]') - do r = 1, nnode - pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) - mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) - end do + do r = 1, nnode + pb_in(j, k, l, r, q) = presb_IP((q - 1)*nnode + r) + mv_in(j, k, l, r, q) = massv_IP((q - 1)*nnode + r) end do - end if - end if - - if (model_eqns == 3) then - $:GPU_LOOP(parallelism='[seq]') - do q = intxb, intxe - q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & - + pi_infs(q - intxb + 1)) end do end if - end do + end if + + if (model_eqns == 3) then + $:GPU_LOOP(parallelism='[seq]') + do q = intxb, intxe + q_cons_vf(q)%sf(j, k, l) = alpha_IP(q - intxb + 1)*(gammas(q - intxb + 1)*pres_IP & + + pi_infs(q - intxb + 1)) + end do + end if + end do $:END_GPU_PARALLEL_LOOP() end if !Correct the state of the inner points in IBs if (num_inner_gps > 0) then $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]') - do i = 1, num_inner_gps + do i = 1, num_inner_gps - innerp = inner_points(i) - j = innerp%loc(1) - k = innerp%loc(2) - l = innerp%loc(3) + innerp = inner_points(i) + j = innerp%loc(1) + k = innerp%loc(2) + l = innerp%loc(3) - $:GPU_LOOP(parallelism='[seq]') - do q = momxb, momxe - q_cons_vf(q)%sf(j, k, l) = 0._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do q = momxb, momxe + q_cons_vf(q)%sf(j, k, l) = 0._wp end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index a5e757f06..9126d1cf7 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -163,14 +163,14 @@ contains #endif $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac(j, k, l) = 0._wp - if (igr_iter_solver == 1) jac_old(j, k, l) = 0._wp - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac(j, k, l) = 0._wp + if (igr_iter_solver == 1) jac_old(j, k, l) = 0._wp end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p == 0) then @@ -245,81 +245,81 @@ contains do q = 1, num_iters $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') - do l = 0, p - do k = 0, n - do j = 0, m - rho_lx = 0._wp - rho_rx = 0._wp - rho_ly = 0._wp - rho_ry = 0._wp - rho_lz = 0._wp - rho_rz = 0._wp - fd_coeff = 0._wp + do l = 0, p + do k = 0, n + do j = 0, m + rho_lx = 0._wp + rho_rx = 0._wp + rho_ly = 0._wp + rho_ry = 0._wp + rho_lz = 0._wp + rho_rz = 0._wp + fd_coeff = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_lx = rho_lx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l))/2._wp - rho_rx = rho_rx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l))/2._wp - rho_ly = rho_ly + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l))/2._wp - rho_ry = rho_ry + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l))/2._wp - if (p > 0) then - rho_lz = rho_lz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1))/2._wp - rho_rz = rho_rz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1))/2._wp - end if - fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_lx = rho_lx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l))/2._wp + rho_rx = rho_rx + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l))/2._wp + rho_ly = rho_ly + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l))/2._wp + rho_ry = rho_ry + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l))/2._wp + if (p > 0) then + rho_lz = rho_lz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1))/2._wp + rho_rz = rho_rz + (q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1))/2._wp + end if + fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) + end do - fd_coeff = 1._wp/fd_coeff + alf_igr* & - ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & - (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) + fd_coeff = 1._wp/fd_coeff + alf_igr* & + ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & + (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) + if (num_dims == 3) then + fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) + end if + + if (igr_iter_solver == 1) then ! Jacobi iteration if (num_dims == 3) then - fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & + jac_rhs(j, k, l)/fd_coeff + else + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry)) + & + jac_rhs(j, k, l)/fd_coeff end if - - if (igr_iter_solver == 1) then ! Jacobi iteration - if (num_dims == 3) then - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & - jac_rhs(j, k, l)/fd_coeff - else - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry)) + & - jac_rhs(j, k, l)/fd_coeff - end if - else ! Gauss Seidel iteration - if (num_dims == 3) then - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & - jac_rhs(j, k, l)/fd_coeff - else - jac(j, k, l) = (alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & - jac_rhs(j, k, l)/fd_coeff - end if + else ! Gauss Seidel iteration + if (num_dims == 3) then + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & + jac_rhs(j, k, l)/fd_coeff + else + jac(j, k, l) = (alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & + jac_rhs(j, k, l)/fd_coeff end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() call s_populate_F_igr_buffers(bc_type, jac_sf) if (igr_iter_solver == 1) then ! Jacobi iteration $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac_old(j, k, l) = jac(j, k, l) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac_old(j, k, l) = jac(j, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end do @@ -341,56 +341,56 @@ contains real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R]') - do l = 0, p - do k = 0, n - do j = -1, m + do l = 0, p + do k = 0, n + do j = -1, m - F_L = 0._wp; F_R = 0._wp - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + F_L = 0._wp; F_R = 0._wp + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do - - vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) - F_L = F_L + coeff_L(q)*jac(j + q, k, l) + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_L = F_L + coeff_L(q)*jac(j + q, k, l) + end do - vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) - F_R = F_R + coeff_R(q)*jac(j + q, k, l) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do - vel_L = vel_L/sum(alpha_rho_L) - vel_R = vel_R/sum(alpha_rho_R) - - #:for LR in ['L', 'R'] - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - 0.5_wp*F_${LR}$*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - 0.5_wp*F_${LR}$*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j)) - #:endfor + vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_R = F_R + coeff_R(q)*jac(j + q, k, l) end do + + vel_L = vel_L/sum(alpha_rho_L) + vel_R = vel_R/sum(alpha_rho_R) + + #:for LR in ['L', 'R'] + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + 0.5_wp*F_${LR}$*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + 0.5_wp*F_${LR}$*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + 0.5_wp*vel_${LR}$*F_${LR}$*(1._wp/dx(j)) + #:endfor end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_igr_sigma_x @@ -420,1687 +420,1230 @@ contains if (idir == 1) then if (p == 0) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + do l = 0, p + do k = 0, n + do j = -1, m - dvel = 0._wp - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + dvel = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 1) = dvel_small - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 2) = dvel_small - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if - - if (q == 0) then - jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + (dvel(1, 1) + dvel(2, 2))**2._wp) - end if + rho_sf_small(i) = rho_L end do - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + dvel_small(1) = (1/(2._wp*dx(j)))*( & + 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) dvel(:, 1) = dvel_small + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if - + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) end do + rho_sf_small(i) = rho_L end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do + if (q == 0) dvel(:, 2) = dvel_small - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp end if - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + if (q == 0) then + jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + (dvel(1, 1) + dvel(2, 2))**2._wp) + end if + end do - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - if (viscous) then - mu_L = 0._wp; mu_R = 0._wp + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) + else + alpha_L(1) = 1._wp end if - E_L = 0._wp; E_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do - - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do + else + alpha_R(1) = 1._wp end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + if (viscous) then + mu_L = 0._wp; mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) + end if + E_L = 0._wp; E_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - else - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m - dvel = 0._wp - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) dvel(:, 1) = dvel_small - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel(:, 2) = dvel_small - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel(:, 3) = dvel_small - - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp - end if + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - if (q == 0) then - jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & - + dvel(1, 3)*dvel(3, 1) & - + dvel(2, 3)*dvel(3, 2)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + dvel(3, 3)**2._wp & - + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp) - end if - end do + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) end do + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) - end if + dvel = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - E_L = 0._wp; E_R = 0._wp + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) dvel(:, 1) = dvel_small + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + end do + rho_sf_small(i) = rho_L end do + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel(:, 2) = dvel_small + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) + end do + rho_sf_small(i) = rho_L end do - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel(:, 3) = dvel_small + + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp + end if + + if (q == 0) then + jac_rhs(j, k, l) = alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & + + dvel(1, 3)*dvel(3, 1) & + + dvel(2, 3)*dvel(3, 2)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + dvel(3, 3)**2._wp & + + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp) + end if + end do + + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do + else + alpha_L(1) = 1._wp end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))) - + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) - + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))) - + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dx(j)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - else if (idir == 2) then - if (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m - - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp - - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) - end do - rho_sf_small(i) = rho_L - end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)) - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) - end do - rho_sf_small(i) = rho_L - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)) - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp - end if - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)) - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_L(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_R(1) = 1._wp - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dx(j)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do + E_L = 0._wp; E_R = 0._wp - if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) - end if + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))) - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dx(j))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j + 1))) - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dx(j))) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))) - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dx(j))) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) - end do + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dx(j))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() - else + end if + else if (idir == 2) then + if (p == 0) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m - - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp - - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - dvel_small = 0._wp - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) - end do - rho_sf_small(i) = rho_L - end do + do l = 0, p + do k = -1, n + do j = 0, m - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) - end do - rho_sf_small(i) = rho_L - end do + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) - end do - rho_sf_small(i) = rho_L + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) end do - - dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp - end if + rho_sf_small(i) = rho_L end do - end if - - alpha_rho_L = 0._wp; alpha_rho_R = 0._wp - alpha_L = 0._wp; alpha_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_L(1) = 1._wp + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + end do + rho_sf_small(i) = rho_L end do - end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) - end do + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_R(1) = 1._wp + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp end if + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then - alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) - alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_L(1) = 1._wp end if - rho_L = sum(alpha_rho_L) - gamma_L = sum(alpha_L*gammas) - pi_inf_L = sum(alpha_L*pi_infs) - - rho_R = sum(alpha_rho_R) - gamma_R = sum(alpha_R*gammas) - pi_inf_R = sum(alpha_R*pi_infs) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do + end do - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) + end do - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + else + alpha_R(1) = 1._wp end if - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do + end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) - end do + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if + + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) + + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) + end if + + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - (0.5_wp*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) end do + end if - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - (0.5_wp*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) - + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) + end do + end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & (0.5_wp*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) - - end do + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) end do end do + end do $:END_GPU_PARALLEL_LOOP() - end if - elseif (idir == 3) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = -1, p - do k = 0, n + else + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n do j = 0, m if (viscous) then @@ -2123,24 +1666,24 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) end do rho_sf_small(i) = rho_L end do dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if @@ -2150,25 +1693,30 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) end do rho_sf_small(i) = rho_L end do + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp end if if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp end if !z-direction contributions @@ -2177,28 +1725,24 @@ contains rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) + rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) end do rho_sf_small(i) = rho_L end do - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp end if end do end if @@ -2211,13 +1755,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_L(1) = 1._wp @@ -2225,7 +1769,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -2233,13 +1777,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_R(1) = 1._wp @@ -2247,7 +1791,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -2277,88 +1821,88 @@ contains end do $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l)) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)) + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l)) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dy(k)) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)) + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)) end if E_L = 0._wp; E_R = 0._wp @@ -2366,14 +1910,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_L = F_L + coeff_L(q)*jac(j, k, l + q) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_R = F_R + coeff_R(q)*jac(j, k, l + q) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -2383,174 +1927,630 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & (0.5_wp*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1))) + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & (0.5_wp*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l))) + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dy(k))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & (0.5_wp*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l + 1))) + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & (0.5_wp*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l))) + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l))) + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k))) end do end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - (0.5_wp*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1))) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - (0.5_wp*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l + 1)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dz(l + 1))) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l))) + (0.5_wp*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l)) - & - 0.5_wp*cfl*(E_L)*(1._wp/dz(l))) + (0.5_wp*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dy(k))) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & (0.5_wp*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1))) + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & (0.5_wp*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l))) + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dy(k))) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & (0.5_wp*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l + 1))) + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1))) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & (0.5_wp*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l))) + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l))) + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k))) end do end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - (0.5_wp*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1))) + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1))) + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - (0.5_wp*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l + 1)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dz(l + 1))) + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k + 1))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - (0.5_wp*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l))) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l))) + (0.5_wp*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l))) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dy(k))) $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - (0.5_wp*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l)) + & - 0.5_wp*cfl*(E_R)*(1._wp/dz(l))) + (0.5_wp*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dy(k))) + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + elseif (idir == 3) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = -1, p + do k = 0, n + do j = 0, m + + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + dvel_small = 0._wp + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp + end if + + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) + end do + rho_sf_small(i) = rho_L + end do + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp + end if + end do + end if + + alpha_rho_L = 0._wp; alpha_rho_R = 0._wp + alpha_L = 0._wp; alpha_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + end do + else + alpha_L(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + end do + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + end do + else + alpha_R(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + end do + end do + + if (num_fluids > 1) then + alpha_L(num_fluids) = 1._wp - sum(alpha_L(1:num_fluids - 1)) + alpha_R(num_fluids) = 1._wp - sum(alpha_R(1:num_fluids - 1)) + end if + + rho_L = sum(alpha_rho_L) + gamma_L = sum(alpha_L*gammas) + pi_inf_L = sum(alpha_L*pi_infs) + + rho_R = sum(alpha_rho_R) + gamma_R = sum(alpha_R*gammas) + pi_inf_R = sum(alpha_R*pi_infs) + + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*(1._wp/dz(l)) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + 0.5_wp*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)) + end if + + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_L = F_L + coeff_L(q)*jac(j, k, l + q) + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_R = F_R + coeff_R(q)*jac(j, k, l + q) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_L(i)* & + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(alpha_rho_L(i))*(1._wp/dz(l))) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_L(i)* & + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_L(i)* & + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(alpha_L(i))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l))) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + (0.5_wp*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + (0.5_wp*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l + 1)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(3))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(1))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*cfl*(rho_L*vel_L(2))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l)) - & + 0.5_wp*cfl*(E_L)*(1._wp/dz(l))) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + (0.5_wp*(alpha_rho_R(i)* & + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(alpha_rho_R(i))*(1._wp/dz(l))) end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + (0.5_wp*(alpha_R(i)* & + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + (0.5_wp*(alpha_R(i)* & + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(alpha_R(i))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + (0.5_wp*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l))) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + (0.5_wp*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + (0.5_wp*(vel_R(3)*(E_R + & + pres_R + F_R))*(1._wp/dz(l + 1)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dz(l + 1))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + (0.5_wp*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(3))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(1))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + (0.5_wp*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*cfl*(rho_R*vel_R(2))*(1._wp/dz(l))) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + (0.5_wp*(vel_R(3)*(E_R + & + pres_R + F_R))*(1._wp/dz(l)) + & + 0.5_wp*cfl*(E_R)*(1._wp/dz(l))) + end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -2612,47 +2612,47 @@ contains if (idir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & - (flux_vf(i)%sf(j - 1, k, l) & - - flux_vf(i)%sf(j, k, l)) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & + (flux_vf(i)%sf(j - 1, k, l) & + - flux_vf(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_vf(i)%sf(j, k - 1, l) & - - flux_vf(i)%sf(j, k, l)) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_vf(i)%sf(j, k - 1, l) & + - flux_vf(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_vf(i)%sf(j, k, l - 1) & - - flux_vf(i)%sf(j, k, l)) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_vf(i)%sf(j, k, l - 1) & + - flux_vf(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index a4abbd9ef..98149d493 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -77,58 +77,58 @@ contains real(wp) :: divB, vdotB $:GPU_PARALLEL_LOOP(collapse=3, private='[k,l,q,v, B]') - do q = 0, p - do l = 0, n - do k = 0, m - - divB = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - end do + do q = 0, p + do l = 0, n + do k = 0, m + + divB = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + end do + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + end do + if (p > 0) then $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) end do - if (p > 0) then - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - end do - end if - - v(1) = q_prim_vf(momxb)%sf(k, l, q) - v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) - v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) - - B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) - B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) - B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) - - vdotB = sum(v*B) - - ! 1: rho -> unchanged - ! 2: vx -> - (divB) * Bx - ! 3: vy -> - (divB) * By - ! 4: vz -> - (divB) * Bz - ! 5: E -> - (divB) * (vdotB) - ! 6: Bx -> - (divB) * vx - ! 7: By -> - (divB) * vy - ! 8: Bz -> - (divB) * vz - - rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) - rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) - rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) - - rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB - - rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) - rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) - rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) + end if + + v(1) = q_prim_vf(momxb)%sf(k, l, q) + v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) + v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) + + B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) + B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) + B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) + + vdotB = sum(v*B) + + ! 1: rho -> unchanged + ! 2: vx -> - (divB) * Bx + ! 3: vy -> - (divB) * By + ! 4: vz -> - (divB) * Bz + ! 5: E -> - (divB) * (vdotB) + ! 6: Bx -> - (divB) * vx + ! 7: By -> - (divB) * vy + ! 8: Bz -> - (divB) * vz + + rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) + rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) + rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) + + rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB + + rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) + rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) + rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_mhd_powell_rhs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 6dfddcc90..d0a2ffbfa 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -314,38 +314,38 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - r = (j + buff_size*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + r = (j + buff_size*(k + (n + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) - end do + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:else $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) - end do + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endif end if @@ -391,40 +391,40 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - r = (j + buff_size*((k + 1) + (n + 1)*l)) - ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + r = (j + buff_size*((k + 1) + (n + 1)*l)) + ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) - end do + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:else ! Unpacking buffer from bc_z%beg $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) - end do + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endif end if diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index b0e046fa9..2c2b7a014 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -119,42 +119,42 @@ contains if (muscl_order == 1) then if (muscl_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do + end do $:END_OMP_PARALLEL_LOOP() else if (muscl_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_OMP_PARALLEL_LOOP() else if (muscl_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_OMP_PARALLEL_LOOP() end if @@ -163,55 +163,55 @@ contains #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,slopeL,slopeR,slope]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - do i = 1, v_size - - slopeL = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - & - v_rs_ws_${XYZ}$ (j, k, l, i) - slopeR = v_rs_ws_${XYZ}$ (j, k, l, i) - & - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - slope = 0._wp - - if (muscl_lim == 1) then ! minmod - if (slopeL*slopeR > 1e-9_wp) then - slope = min(abs(slopeL), abs(slopeR)) - end if - if (slopeL < 0._wp) slope = -slope - elseif (muscl_lim == 2) then ! MC - if (slopeL*slopeR > 1e-9_wp) then - slope = min(2._wp*abs(slopeL), 2._wp*abs(slopeR)) - slope = min(slope, 5e-1_wp*(abs(slopeL) + abs(slopeR))) - end if - if (slopeL < 0._wp) slope = -slope - elseif (muscl_lim == 3) then ! Van Albada - if (abs(slopeL) > 1e-6_wp .and. abs(slopeR) > 1e-6_wp .and. & - abs(slopeL + slopeR) > 1e-6_wp .and. slopeL*slopeR > 1e-6_wp) then - slope = ((slopeL + slopeR)*slopeL*slopeR)/(slopeL**2._wp + slopeR**2._wp) - end if - elseif (muscl_lim == 4) then ! Van Leer - if (abs(slopeL + slopeR) > 1.e-6_wp .and. slopeL*slopeR > 1.e-6_wp) then - slope = 2._wp*slopeL*slopeR/(slopeL + slopeR) - end if - elseif (muscl_lim == 5) then ! SUPERBEE - if (slopeL*slopeR > 1e-6_wp) then - slope = -1._wp*min(-min(2._wp*abs(slopeL), abs(slopeR)), -min(abs(slopeL), 2._wp*abs(slopeR))) - end if + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + do i = 1, v_size + + slopeL = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - & + v_rs_ws_${XYZ}$ (j, k, l, i) + slopeR = v_rs_ws_${XYZ}$ (j, k, l, i) - & + v_rs_ws_${XYZ}$ (j - 1, k, l, i) + slope = 0._wp + + if (muscl_lim == 1) then ! minmod + if (slopeL*slopeR > 1e-9_wp) then + slope = min(abs(slopeL), abs(slopeR)) + end if + if (slopeL < 0._wp) slope = -slope + elseif (muscl_lim == 2) then ! MC + if (slopeL*slopeR > 1e-9_wp) then + slope = min(2._wp*abs(slopeL), 2._wp*abs(slopeR)) + slope = min(slope, 5e-1_wp*(abs(slopeL) + abs(slopeR))) + end if + if (slopeL < 0._wp) slope = -slope + elseif (muscl_lim == 3) then ! Van Albada + if (abs(slopeL) > 1e-6_wp .and. abs(slopeR) > 1e-6_wp .and. & + abs(slopeL + slopeR) > 1e-6_wp .and. slopeL*slopeR > 1e-6_wp) then + slope = ((slopeL + slopeR)*slopeL*slopeR)/(slopeL**2._wp + slopeR**2._wp) + end if + elseif (muscl_lim == 4) then ! Van Leer + if (abs(slopeL + slopeR) > 1.e-6_wp .and. slopeL*slopeR > 1.e-6_wp) then + slope = 2._wp*slopeL*slopeR/(slopeL + slopeR) + end if + elseif (muscl_lim == 5) then ! SUPERBEE + if (slopeL*slopeR > 1e-6_wp) then + slope = -1._wp*min(-min(2._wp*abs(slopeL), abs(slopeR)), -min(abs(slopeL), 2._wp*abs(slopeR))) end if + end if - ! reconstruct from left side - vL_rs_vf_${XYZ}$ (j, k, l, i) = & - v_rs_ws_${XYZ}$ (j, k, l, i) - (5.e-1_wp*slope) + ! reconstruct from left side + vL_rs_vf_${XYZ}$ (j, k, l, i) = & + v_rs_ws_${XYZ}$ (j, k, l, i) - (5.e-1_wp*slope) - ! reconstruct from the right side - vR_rs_vf_${XYZ}$ (j, k, l, i) = & - v_rs_ws_${XYZ}$ (j, k, l, i) + (5.e-1_wp*slope) + ! reconstruct from the right side + vR_rs_vf_${XYZ}$ (j, k, l, i) = & + v_rs_ws_${XYZ}$ (j, k, l, i) + (5.e-1_wp*slope) - end do end do end do end do + end do $:END_OMP_PARALLEL_LOOP() end if #:endfor @@ -244,58 +244,58 @@ contains if (muscl_dir == ${MUSCL_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end - aCL = v_rs_ws_${XYZ}$ (j - 1, k, l, advxb) - aC = v_rs_ws_${XYZ}$ (j, k, l, advxb) - aCR = v_rs_ws_${XYZ}$ (j + 1, k, l, advxb) + aCL = v_rs_ws_${XYZ}$ (j - 1, k, l, advxb) + aC = v_rs_ws_${XYZ}$ (j, k, l, advxb) + aCR = v_rs_ws_${XYZ}$ (j + 1, k, l, advxb) - moncon = (aCR - aC)*(aC - aCL) + moncon = (aCR - aC)*(aC - aCL) - if (aC >= ic_eps .and. aC <= 1._wp - ic_eps .and. moncon > moncon_cutoff) then ! Interface cell - - if (aCR - aCL > 0._wp) then - sign = 1._wp - else - sign = -1._wp - end if - - qmin = min(aCR, aCL) - qmax = max(aCR, aCL) - qmin - - C = (aC - qmin + sgm_eps)/(qmax + sgm_eps) - B = exp(sign*ic_beta*(2._wp*C - 1._wp)) - A = (B/cosh(ic_beta) - 1._wp)/tanh(ic_beta) - - ! Left reconstruction - aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*A) - if (aTHINC < ic_eps) aTHINC = ic_eps - if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps - vL_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & - vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & - (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) - vL_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC - - ! Right reconstruction - aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*(tanh(ic_beta) + A)/(1._wp + A*tanh(ic_beta))) - if (aTHINC < ic_eps) aTHINC = ic_eps - if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps - vR_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & - vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & - (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) - vR_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + if (aC >= ic_eps .and. aC <= 1._wp - ic_eps .and. moncon > moncon_cutoff) then ! Interface cell + if (aCR - aCL > 0._wp) then + sign = 1._wp + else + sign = -1._wp end if - end do + qmin = min(aCR, aCL) + qmax = max(aCR, aCL) - qmin + + C = (aC - qmin + sgm_eps)/(qmax + sgm_eps) + B = exp(sign*ic_beta*(2._wp*C - 1._wp)) + A = (B/cosh(ic_beta) - 1._wp)/tanh(ic_beta) + + ! Left reconstruction + aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*A) + if (aTHINC < ic_eps) aTHINC = ic_eps + if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps + vL_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & + vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC + vL_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & + (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) + vL_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC + vL_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + + ! Right reconstruction + aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*(tanh(ic_beta) + A)/(1._wp + A*tanh(ic_beta))) + if (aTHINC < ic_eps) aTHINC = ic_eps + if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps + vR_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & + vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC + vR_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & + (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) + vR_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC + vR_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + + end if + end do end do + end do $:END_OMP_PARALLEL_LOOP() end if #:endfor @@ -319,15 +319,15 @@ contains if (muscl_dir == 1) then $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) end do end do end do + end do $:END_OMP_PARALLEL_LOOP() end if @@ -336,15 +336,15 @@ contains if (muscl_dir == 2) then $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) end do end do end do + end do $:END_OMP_PARALLEL_LOOP() end if @@ -352,15 +352,15 @@ contains if (p == 0) return if (muscl_dir == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) end do end do end do + end do $:END_OMP_PARALLEL_LOOP() end if diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index 5e404e7dd..4c6618dd9 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -71,13 +71,13 @@ contains integer :: j, k, l $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - call s_relax_cell_pressure(q_cons_vf, j, k, l) - end do + do l = 0, p + do k = 0, n + do j = 0, m + call s_relax_cell_pressure(q_cons_vf, j, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_pressure_relaxation_procedure diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 5e16b7c64..67d29cc2c 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -434,130 +434,130 @@ contains if (.not. polytropic) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,nb_q,nR,nR2,R,R2,nb_dot,nR_dot,nR2_dot,var,AX]') - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 0, m - nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - var = max(R2 - R**2._wp, verysmall) - if (q <= 2) then - AX = R - sqrt(var) + do i = 1, nb + do q = 1, nnode + do l = 0, p + do k = 0, n + do j = 0, m + nb_q = q_cons_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR = q_cons_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2 = q_cons_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + R = q_prim_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + R2 = q_prim_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + var = max(R2 - R**2._wp, verysmall) + if (q <= 2) then + AX = R - sqrt(var) + else + AX = R + sqrt(var) + end if + select case (idir) + case (1) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + case (2) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + case (3) + if (is_axisym) then + nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) + nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) + nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) else - AX = R + sqrt(var) + nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) + nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) + nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & + (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) end if + end select + if (q <= 2) then select case (idir) case (1) - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j - 1, k, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) case (2) - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k - 1, l) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) case (3) if (is_axisym) then - nb_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l)) - nR_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l)) - nR2_dot = q_prim_vf(contxe + idir)%sf(j, k, l)*(flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - else - nb_dot = flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + (i - 1)*nmom)%sf(j, k, l) - nR_dot = flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 1 + (i - 1)*nmom)%sf(j, k, l) - nR2_dot = flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l - 1) - flux_n_vf(bubxb + 3 + (i - 1)*nmom)%sf(j, k, l) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2)* & - (nR_dot*nb_q - nR*nb_dot)*(pb(j, k, l, q, i)) - end if - end select - if (q <= 2) then - select case (idir) - case (1) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (2) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + else + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (3) - if (is_axisym) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) + 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - end select - else - select case (idir) - case (1) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + end if + end select + else + select case (idir) + case (1) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (2) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) + case (3) + if (is_axisym) then + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dx(j)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (2) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + else + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dy(k)*AX*nb_q**2*sqrt(var)*2._wp)* & + rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - case (3) - if (is_axisym) then - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*y_cc(k)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - else - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (nR2_dot*nb_q - nR2*nb_dot)*(pb(j, k, l, q, i)) - rhs_pb(j, k, l, q, i) = rhs_pb(j, k, l, q, i) - 3._wp*gam/(dz(l)*AX*nb_q**2*sqrt(var)*2._wp)* & - (-2._wp*(nR/nb_q)*(nR_dot*nb_q - nR*nb_dot))*(pb(j, k, l, q, i)) - end if - end select - end if - end do + end if + end select + end if end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if ! The following block is not repeated and is left as is if (idir == 1) then $:GPU_PARALLEL_LOOP(private='[i,l,q]', collapse=3) - do l = 0, p - do q = 0, n - do i = 0, m - rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) - j = bubxb - $:GPU_LOOP(parallelism='[seq]') - do k = 1, nb - rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) - rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) - rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) - rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) - rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) - rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) - j = j + 6 - end do + do l = 0, p + do q = 0, n + do i = 0, m + rhs_vf(alf_idx)%sf(i, q, l) = rhs_vf(alf_idx)%sf(i, q, l) + mom_sp(2)%sf(i, q, l) + j = bubxb + $:GPU_LOOP(parallelism='[seq]') + do k = 1, nb + rhs_vf(j)%sf(i, q, l) = rhs_vf(j)%sf(i, q, l) + mom_3d(0, 0, k)%sf(i, q, l) + rhs_vf(j + 1)%sf(i, q, l) = rhs_vf(j + 1)%sf(i, q, l) + mom_3d(1, 0, k)%sf(i, q, l) + rhs_vf(j + 2)%sf(i, q, l) = rhs_vf(j + 2)%sf(i, q, l) + mom_3d(0, 1, k)%sf(i, q, l) + rhs_vf(j + 3)%sf(i, q, l) = rhs_vf(j + 3)%sf(i, q, l) + mom_3d(2, 0, k)%sf(i, q, l) + rhs_vf(j + 4)%sf(i, q, l) = rhs_vf(j + 4)%sf(i, q, l) + mom_3d(1, 1, k)%sf(i, q, l) + rhs_vf(j + 5)%sf(i, q, l) = rhs_vf(j + 5)%sf(i, q, l) + mom_3d(0, 2, k)%sf(i, q, l) + j = j + 6 end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -715,142 +715,142 @@ contains $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') $:GPU_PARALLEL_LOOP(collapse=3, private='[id1,id2,id3,moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T]') - do id3 = is3_qbmm%beg, is3_qbmm%end - do id2 = is2_qbmm%beg, is2_qbmm%end - do id1 = is1_qbmm%beg, is1_qbmm%end - - alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) - pres = q_prim_vf(E_idx)%sf(id1, id2, id3) - rho = q_prim_vf(contxb)%sf(id1, id2, id3) - - if (bubble_model == 2) then - n_tait = 1._wp/gammas(1) + 1._wp - B_tait = pi_infs(1)*(n_tait - 1)/n_tait - c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) - c = merge(sqrt(c), sgm_eps, c > 0._wp) - end if + do id3 = is3_qbmm%beg, is3_qbmm%end + do id2 = is2_qbmm%beg, is2_qbmm%end + do id1 = is1_qbmm%beg, is1_qbmm%end + + alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) + pres = q_prim_vf(E_idx)%sf(id1, id2, id3) + rho = q_prim_vf(contxb)%sf(id1, id2, id3) + + if (bubble_model == 2) then + n_tait = 1._wp/gammas(1) + 1._wp + B_tait = pi_infs(1)*(n_tait - 1)/n_tait + c = n_tait*(pres + B_tait)*(1._wp - alf)/(rho) + c = merge(sqrt(c), sgm_eps, c > 0._wp) + end if - call s_coeff_selector(pres, rho, c, coeff, polytropic) + call s_coeff_selector(pres, rho, c, coeff, polytropic) - if (alf > small_alf) then - nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) + if (alf > small_alf) then + nbub = q_cons_vf(bubxb)%sf(id1, id2, id3) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb + ! Gather moments for this bubble bin $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb - ! Gather moments for this bubble bin + do r = 2, nmom + moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) + end do + moms(1) = 1._wp + call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) + + if (polytropic) then $:GPU_LOOP(parallelism='[seq]') - do r = 2, nmom - moms(r) = q_prim_vf(bubmoms(q, r))%sf(id1, id2, id3) + do j = 1, nnode + wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) end do - moms(1) = 1._wp - call s_chyqmom(moms, wght(:, q), abscX(:, q), abscY(:, q)) - - if (polytropic) then - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - wght_pb(j, q) = wght(j, q)*(pb0(q) - pv) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) - x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) - k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) - rho_mw = pv/(chi_vw*R_v*Tw) - rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) - rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) - T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) - grad_T = -Re_trans_T(q)*(T_bar - Tw) - ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) - wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) - wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) - wght_ht(j, q) = wght(j, q)*ht(j, q) - end do - end if - - ! Compute change in moments due to bubble dynamics - r = 1 + else $:GPU_LOOP(parallelism='[seq]') - do i2 = 0, 2 - $:GPU_LOOP(parallelism='[seq]') - do i1 = 0, 2 - if ((i1 + i2) <= 2) then - momsum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nterms - select case (bubble_model) - case (3) - if (j == 3) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - case (2) - if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) - else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) - else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) - else - momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) - end if - end select - end do - moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum - msum(r) = momsum - r = r + 1 - end if - end do + do j = 1, nnode + chi_vw = 1._wp/(1._wp + R_v/R_n*(pb(id1, id2, id3, j, q)/pv - 1._wp)) + x_vw = M_n*chi_vw/(M_v + (M_n - M_v)*chi_vw) + k_mw = x_vw*k_v(q)/(x_vw + (1._wp - x_vw)*phi_vn) + (1._wp - x_vw)*k_n(q)/(x_vw*phi_nv + 1._wp - x_vw) + rho_mw = pv/(chi_vw*R_v*Tw) + rhs_mv(id1, id2, id3, j, q) = -Re_trans_c(q)*((mv(id1, id2, id3, j, q)/(mv(id1, id2, id3, j, q) + mass_n0(q))) - chi_vw) + rhs_mv(id1, id2, id3, j, q) = rho_mw*rhs_mv(id1, id2, id3, j, q)/Pe_c/(1._wp - chi_vw)/abscX(j, q) + T_bar = Tw*(pb(id1, id2, id3, j, q)/pb0(q))*(abscX(j, q)/R0(q))**3*(mass_n0(q) + mass_v0(q))/(mass_n0(q) + mv(id1, id2, id3, j, q)) + grad_T = -Re_trans_T(q)*(T_bar - Tw) + ht(j, q) = pb0(q)*k_mw*grad_T/Pe_T(q)/abscX(j, q) + wght_pb(j, q) = wght(j, q)*(pb(id1, id2, id3, j, q)) + wght_mv(j, q) = wght(j, q)*(rhs_mv(id1, id2, id3, j, q)) + wght_ht(j, q) = wght(j, q)*ht(j, q) end do + end if - ! Compute change in pb and mv for non-polytropic model - if (.not. polytropic) then - $:GPU_LOOP(parallelism='[seq]') - do j = 1, nnode - drdt = msum(2) - drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) - drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) - drdt = drdt + drdt2 - rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) - rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw - rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) - rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) - end do - end if + ! Compute change in moments due to bubble dynamics + r = 1 + $:GPU_LOOP(parallelism='[seq]') + do i2 = 0, 2 + $:GPU_LOOP(parallelism='[seq]') + do i1 = 0, 2 + if ((i1 + i2) <= 2) then + momsum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nterms + select case (bubble_model) + case (3) + if (j == 3) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + case (2) + if ((j >= 7 .and. j <= 9) .or. (j >= 22 .and. j <= 23) .or. (j >= 10 .and. j <= 11) .or. (j == 26)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_pb(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 27 .and. j <= 29) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_mv(:, q), momrhs(:, i1, i2, j, q)) + else if ((j >= 30 .and. j <= 32) .and. (.not. polytropic)) then + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght_ht(:, q), momrhs(:, i1, i2, j, q)) + else + momsum = momsum + coeff(j, i1, i2)*(R0(q)**momrhs(3, i1, i2, j, q))*f_quad2D(abscX(:, q), abscY(:, q), wght(:, q), momrhs(:, i1, i2, j, q)) + end if + end select + end do + moms3d(i1, i2, q)%sf(id1, id2, id3) = nbub*momsum + msum(r) = momsum + r = r + 1 + end if + end do end do - ! Compute special high-order moments - momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) - if (abs(gam - 1._wp) <= 1.e-4_wp) then - momsp(4)%sf(id1, id2, id3) = 1._wp - else - if (polytropic) then - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) - else - momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) - end if + ! Compute change in pb and mv for non-polytropic model + if (.not. polytropic) then + $:GPU_LOOP(parallelism='[seq]') + do j = 1, nnode + drdt = msum(2) + drdt2 = merge(-1._wp, 1._wp, j == 1 .or. j == 2)/(2._wp*sqrt(merge(moms(4) - moms(2)**2._wp, verysmall, moms(4) - moms(2)**2._wp > 0._wp))) + drdt2 = drdt2*(msum(3) - 2._wp*moms(2)*msum(2)) + drdt = drdt + drdt2 + rhs_pb(id1, id2, id3, j, q) = (-3._wp*gam*drdt/abscX(j, q))*(pb(id1, id2, id3, j, q)) + rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*rhs_mv(id1, id2, id3, j, q)*R_v*Tw + rhs_pb(id1, id2, id3, j, q) = rhs_pb(id1, id2, id3, j, q) + (3._wp*gam/abscX(j, q))*ht(j, q) + rhs_mv(id1, id2, id3, j, q) = rhs_mv(id1, id2, id3, j, q)*(4._wp*pi*abscX(j, q)**2._wp) + end do end if + end do + + ! Compute special high-order moments + momsp(1)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) + momsp(2)%sf(id1, id2, id3) = 4._wp*pi*nbub*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) + momsp(3)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght, 3._wp, 2._wp, 0._wp) + if (abs(gam - 1._wp) <= 1.e-4_wp) then + momsp(4)%sf(id1, id2, id3) = 1._wp else + if (polytropic) then + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp*(1._wp - gam), 0._wp, 3._wp*gam) + pv*f_quad(abscX, abscY, wght, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + else + momsp(4)%sf(id1, id2, id3) = f_quad(abscX, abscY, wght_pb, 3._wp, 0._wp, 0._wp) - 4._wp*Re_inv*f_quad(abscX, abscY, wght, 2._wp, 1._wp, 0._wp) - (2._wp/Web)*f_quad(abscX, abscY, wght, 2._wp, 0._wp, 0._wp) + end if + end if + else + $:GPU_LOOP(parallelism='[seq]') + do q = 1, nb $:GPU_LOOP(parallelism='[seq]') - do q = 1, nb + do i1 = 0, 2 $:GPU_LOOP(parallelism='[seq]') - do i1 = 0, 2 - $:GPU_LOOP(parallelism='[seq]') - do i2 = 0, 2 - moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp - end do + do i2 = 0, 2 + moms3d(i1, i2, q)%sf(id1, id2, id3) = 0._wp end do end do - momsp(1)%sf(id1, id2, id3) = 0._wp - momsp(2)%sf(id1, id2, id3) = 0._wp - momsp(3)%sf(id1, id2, id3) = 0._wp - momsp(4)%sf(id1, id2, id3) = 0._wp - end if - end do + end do + momsp(1)%sf(id1, id2, id3) = 0._wp + momsp(2)%sf(id1, id2, id3) = 0._wp + momsp(3)%sf(id1, id2, id3) = 0._wp + momsp(4)%sf(id1, id2, id3) = 0._wp + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() contains diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 20abde824..06797bb16 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -565,17 +565,17 @@ contains end if ! end allocation of viscous variables $:GPU_PARALLEL_LOOP(private='[i,j,k,l,id]', collapse=4) - do id = 1, num_dims - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp - end do + do id = 1, num_dims + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + flux_gsrc_n(id)%vf(i)%sf(j, k, l) = 0._wp end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if ! end allocation for .not. igr @@ -648,37 +648,37 @@ contains if (.not. igr) then ! Association/Population of Working Variables $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_cons_qp%vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_cons_qp%vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! Converting Conservative to Primitive Variables if (mpp_lim .and. bubbles_euler) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - alf_sum%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - 1 - alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - 1 - q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & - /alf_sum%sf(j, k, l) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + alf_sum%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe - 1 + alf_sum%sf(j, k, l) = alf_sum%sf(j, k, l) + q_cons_qp%vf(i)%sf(j, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe - 1 + q_cons_qp%vf(i)%sf(j, k, l) = q_cons_qp%vf(i)%sf(j, k, l)*(1._wp - q_cons_qp%vf(alf_idx)%sf(j, k, l)) & + /alf_sum%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -740,15 +740,15 @@ contains if (id == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do l = -1, p + 1 - do k = -1, n + 1 - do j = -1, m + 1 - do i = 1, sys_size - rhs_vf(i)%sf(j, k, l) = 0._wp - end do + do l = -1, p + 1 + do k = -1, n + 1 + do j = -1, m + 1 + do i = 1, sys_size + rhs_vf(i)%sf(j, k, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -971,17 +971,17 @@ contains if (ib) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (ib_markers%sf(j, k, l) /= 0) then - do i = 1, sys_size - rhs_vf(i)%sf(j, k, l) = 0._wp - end do - end if - end do + do l = 0, p + do k = 0, n + do j = 0, m + if (ib_markers%sf(j, k, l) /= 0) then + do i = 1, sys_size + rhs_vf(i)%sf(j, k, l) = 0._wp + end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1038,15 +1038,15 @@ contains if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then if (.not. igr) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_prim_vf(i)%sf(j, k, l) = q_prim_qp%vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_prim_vf(i)%sf(j, k, l) = q_prim_qp%vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1080,28 +1080,28 @@ contains if (alt_soundspeed) then $:GPU_PARALLEL_LOOP(private='[k_loop,l_loop,q_loop]', collapse=3) - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - blkmod1(k_loop, l_loop, q_loop) = ((gammas(1) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & - pi_infs(1))/gammas(1) - blkmod2(k_loop, l_loop, q_loop) = ((gammas(2) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & - pi_infs(2))/gammas(2) - alpha1(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) - - if (bubbles_euler) then - alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(alf_idx - 1)%sf(k_loop, l_loop, q_loop) - else - alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxe)%sf(k_loop, l_loop, q_loop) - end if + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + blkmod1(k_loop, l_loop, q_loop) = ((gammas(1) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & + pi_infs(1))/gammas(1) + blkmod2(k_loop, l_loop, q_loop) = ((gammas(2) + 1._wp)*q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + & + pi_infs(2))/gammas(2) + alpha1(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) + + if (bubbles_euler) then + alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(alf_idx - 1)%sf(k_loop, l_loop, q_loop) + else + alpha2(k_loop, l_loop, q_loop) = q_cons_vf%vf(advxe)%sf(k_loop, l_loop, q_loop) + end if - Kterm(k_loop, l_loop, q_loop) = alpha1(k_loop, l_loop, q_loop)*alpha2(k_loop, l_loop, q_loop)* & - (blkmod2(k_loop, l_loop, q_loop) - blkmod1(k_loop, l_loop, q_loop))/ & - (alpha1(k_loop, l_loop, q_loop)*blkmod2(k_loop, l_loop, q_loop) + & - alpha2(k_loop, l_loop, q_loop)*blkmod1(k_loop, l_loop, q_loop)) - end do + Kterm(k_loop, l_loop, q_loop) = alpha1(k_loop, l_loop, q_loop)*alpha2(k_loop, l_loop, q_loop)* & + (blkmod2(k_loop, l_loop, q_loop) - blkmod1(k_loop, l_loop, q_loop))/ & + (alpha1(k_loop, l_loop, q_loop)*blkmod2(k_loop, l_loop, q_loop) + & + alpha2(k_loop, l_loop, q_loop)*blkmod1(k_loop, l_loop, q_loop)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1115,38 +1115,38 @@ contains end if $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k_loop,l_loop,q_loop,inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - inv_ds = 1._wp/dx(k_loop) - flux_face1 = flux_n(1)%vf(j)%sf(k_loop - 1, l_loop, q_loop) - flux_face2 = flux_n(1)%vf(j)%sf(k_loop, l_loop, q_loop) - rhs_vf(j)%sf(k_loop, l_loop, q_loop) = inv_ds*(flux_face1 - flux_face2) - end do + do j = 1, sys_size + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + inv_ds = 1._wp/dx(k_loop) + flux_face1 = flux_n(1)%vf(j)%sf(k_loop - 1, l_loop, q_loop) + flux_face2 = flux_n(1)%vf(j)%sf(k_loop, l_loop, q_loop) + rhs_vf(j)%sf(k_loop, l_loop, q_loop) = inv_ds*(flux_face1 - flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k_loop,l_loop,q_loop,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do q_loop = 0, p - do l_loop = 0, n - do k_loop = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dx(k_loop) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(k_loop, l_loop, q_loop) - pressure_val = q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(k_loop - 1, l_loop, q_loop) - rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - end do + do q_loop = 0, p + do l_loop = 0, n + do k_loop = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dx(k_loop) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(k_loop, l_loop, q_loop) + pressure_val = q_prim_vf%vf(E_idx)%sf(k_loop, l_loop, q_loop) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(k_loop, l_loop, q_loop) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(k_loop - 1, l_loop, q_loop) + rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(k_loop, l_loop, q_loop) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1161,60 +1161,60 @@ contains end if $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do l = 0, p - do k = 0, n - do q = 0, m - inv_ds = 1._wp/dy(k) - flux_face1 = flux_n(2)%vf(j)%sf(q, k - 1, l) - flux_face2 = flux_n(2)%vf(j)%sf(q, k, l) - rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) + inv_ds*(flux_face1 - flux_face2) - end do + do j = 1, sys_size + do l = 0, p + do k = 0, n + do q = 0, m + inv_ds = 1._wp/dy(k) + flux_face1 = flux_n(2)%vf(j)%sf(q, k - 1, l) + flux_face2 = flux_n(2)%vf(j)%sf(q, k, l) + rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) + inv_ds*(flux_face1 - flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (model_eqns == 3) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do l = 0, p - do k = 0, n - do q = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dy(k) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(q, k, l) - pressure_val = q_prim_vf%vf(E_idx)%sf(q, k, l) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(q, k, l) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(q, k - 1, l) + do l = 0, p + do k = 0, n + do q = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dy(k) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(q, k, l) + pressure_val = q_prim_vf%vf(E_idx)%sf(q, k, l) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(q, k, l) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(q, k - 1, l) + rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) + if (cyl_coord) then rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - if (cyl_coord) then - rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(q, k, l) - & - 5.e-1_wp/y_cc(k)*advected_qty_val*pressure_val*(flux_face1 + flux_face2) - end if - end do + 5.e-1_wp/y_cc(k)*advected_qty_val*pressure_val*(flux_face1 + flux_face2) + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (cyl_coord) then $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') - do j = 1, sys_size - do l = 0, p - do k = 0, n - do q = 0, m - flux_face1 = flux_gsrc_n(2)%vf(j)%sf(q, k - 1, l) - flux_face2 = flux_gsrc_n(2)%vf(j)%sf(q, k, l) - rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) - & - 5.e-1_wp/y_cc(k)*(flux_face1 + flux_face2) - end do + do j = 1, sys_size + do l = 0, p + do k = 0, n + do q = 0, m + flux_face1 = flux_gsrc_n(2)%vf(j)%sf(q, k - 1, l) + flux_face2 = flux_gsrc_n(2)%vf(j)%sf(q, k, l) + rhs_vf(j)%sf(q, k, l) = rhs_vf(j)%sf(q, k, l) - & + 5.e-1_wp/y_cc(k)*(flux_face1 + flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1230,70 +1230,70 @@ contains if (grid_geometry == 3) then ! Cylindrical Coordinates $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,velocity_val,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - inv_ds = 1._wp/(dz(k)*y_cc(q)) - velocity_val = q_prim_vf%vf(contxe + idir)%sf(l, q, k) - flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + & - inv_ds*velocity_val*(flux_face1 - flux_face2) - end do + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + inv_ds = 1._wp/(dz(k)*y_cc(q)) + velocity_val = q_prim_vf%vf(contxe + idir)%sf(l, q, k) + flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + & + inv_ds*velocity_val*(flux_face1 - flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - flux_face1 = flux_gsrc_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_gsrc_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) - & - 5.e-1_wp/y_cc(q)*(flux_face1 + flux_face2) - end do + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + flux_face1 = flux_gsrc_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_gsrc_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) - & + 5.e-1_wp/y_cc(q)*(flux_face1 + flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! Cartesian Coordinates $:GPU_PARALLEL_LOOP(collapse=4,private='[j,k,l,q,inv_ds,flux_face1,flux_face2]') - do j = 1, sys_size - do k = 0, p - do q = 0, n - do l = 0, m - inv_ds = 1._wp/dz(k) - flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) - flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) - rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + inv_ds*(flux_face1 - flux_face2) - end do + do j = 1, sys_size + do k = 0, p + do q = 0, n + do l = 0, m + inv_ds = 1._wp/dz(k) + flux_face1 = flux_n(3)%vf(j)%sf(l, q, k - 1) + flux_face2 = flux_n(3)%vf(j)%sf(l, q, k) + rhs_vf(j)%sf(l, q, k) = rhs_vf(j)%sf(l, q, k) + inv_ds*(flux_face1 - flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (model_eqns == 3) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i_fluid_loop,k,l,q,inv_ds,advected_qty_val, pressure_val,flux_face1,flux_face2]') - do k = 0, p - do q = 0, n - do l = 0, m - do i_fluid_loop = 1, num_fluids - inv_ds = 1._wp/dz(k) - advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(l, q, k) - pressure_val = q_prim_vf%vf(E_idx)%sf(l, q, k) - flux_face1 = flux_src_n_vf%vf(advxb)%sf(l, q, k) - flux_face2 = flux_src_n_vf%vf(advxb)%sf(l, q, k - 1) - rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) = & - rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) - & - inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) - end do + do k = 0, p + do q = 0, n + do l = 0, m + do i_fluid_loop = 1, num_fluids + inv_ds = 1._wp/dz(k) + advected_qty_val = q_cons_vf%vf(i_fluid_loop + advxb - 1)%sf(l, q, k) + pressure_val = q_prim_vf%vf(E_idx)%sf(l, q, k) + flux_face1 = flux_src_n_vf%vf(advxb)%sf(l, q, k) + flux_face2 = flux_src_n_vf%vf(advxb)%sf(l, q, k - 1) + rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) = & + rhs_vf(i_fluid_loop + intxb - 1)%sf(l, q, k) - & + inv_ds*advected_qty_val*pressure_val*(flux_face1 - flux_face2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1323,62 +1323,62 @@ contains use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do q_idx = 0, p ! z_extent - do l_idx = 0, n ! y_extent - do k_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dx(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(k_idx, l_idx, q_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do + do j_adv = advxb, advxe + do q_idx = 0, p ! z_extent + do l_idx = 0, n ! y_extent + do k_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dx(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(k_idx, l_idx, q_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe due to outer alt_soundspeed check + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxe)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) - local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) + local_k_term_val = Kterm_arg(k_idx, l_idx, q_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(advxb)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m - local_inv_ds = 1._wp/dx(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) - rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - end do + do j_adv = advxb, advxe + do q_idx = 0, p; do l_idx = 0, n; do k_idx = 0, m + local_inv_ds = 1._wp/dx(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx, l_idx, q_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(k_idx - 1, l_idx, q_idx) + rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) = rhs_vf_arg(j_adv)%sf(k_idx, l_idx, q_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1387,70 +1387,70 @@ contains use_standard_riemann = (riemann_solver == 1 .or. riemann_solver == 4) if (use_standard_riemann) then $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p ! z_extent - do k_idx = 0, n ! y_extent - do q_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do + do j_adv = advxb, advxe + do l_idx = 0, p ! z_extent + do k_idx = 0, n ! y_extent + do q_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then + rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxe)%sf(q_idx, k_idx, l_idx) - & + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val,local_term_coeff, local_flux1, local_flux2]') - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_k_term_val = Kterm_arg(q_idx, k_idx, l_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + if (cyl_coord) then rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - if (cyl_coord) then - rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(advxb)%sf(q_idx, k_idx, l_idx) + & - (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) - end if - end do; end do; end do + (local_k_term_val/(2._wp*y_cc(k_idx)))*(local_flux1 + local_flux2) + end if + end do; end do; end do $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m - local_inv_ds = 1._wp/dy(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) - rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - end do + do j_adv = advxb, advxe + do l_idx = 0, p; do k_idx = 0, n; do q_idx = 0, m + local_inv_ds = 1._wp/dy(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx, l_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(q_idx, k_idx - 1, l_idx) + rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) = rhs_vf_arg(j_adv)%sf(q_idx, k_idx, l_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1464,62 +1464,62 @@ contains if (use_standard_riemann) then $:GPU_PARALLEL_LOOP(collapse=4,private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p ! z_extent - do q_idx = 0, n ! y_extent - do l_idx = 0, m ! x_extent - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do + do j_adv = advxb, advxe + do k_idx = 0, p ! z_extent + do q_idx = 0, n ! y_extent + do l_idx = 0, m ! x_extent + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_prim_vf_arg%vf(contxe + current_idir)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! Other Riemann solvers if (alt_soundspeed) then if (bubbles_euler .neqv. .true.) then $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds,local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val - local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val - local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxe)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxe)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, private='[k_idx,l_idx,q_idx,local_inv_ds, local_q_cons_val, local_k_term_val, local_term_coeff, local_flux1, local_flux2]') - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe - local_term_coeff = local_q_cons_val + local_k_term_val - local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_q_cons_val = q_cons_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_k_term_val = Kterm_arg(l_idx, q_idx, k_idx) ! Access is safe + local_term_coeff = local_q_cons_val + local_k_term_val + local_flux1 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(advxb)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(advxb)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do $:END_GPU_PARALLEL_LOOP() end if else ! NOT alt_soundspeed $:GPU_PARALLEL_LOOP(collapse=4, private='[j_adv,k_idx,l_idx,q_idx,local_inv_ds, local_term_coeff,local_flux1,local_flux2]') - do j_adv = advxb, advxe - do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m - local_inv_ds = 1._wp/dz(k_idx) - local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) - local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) - rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & - local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) - end do; end do; end do - end do + do j_adv = advxb, advxe + do k_idx = 0, p; do q_idx = 0, n; do l_idx = 0, m + local_inv_ds = 1._wp/dz(k_idx) + local_term_coeff = q_cons_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux1 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx) + local_flux2 = flux_src_n_vf_arg%vf(j_adv)%sf(l_idx, q_idx, k_idx - 1) + rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) = rhs_vf_arg(j_adv)%sf(l_idx, q_idx, k_idx) + & + local_inv_ds*local_term_coeff*(local_flux1 - local_flux2) + end do; end do; end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1543,54 +1543,54 @@ contains if (surface_tension) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dx(j)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j - 1, k, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dx(j)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j - 1, k, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(i)%sf(j - 1, k, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(i)%sf(j - 1, k, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dx(j)* & - (flux_src_n_in(E_idx)%sf(j - 1, k, l) & - - flux_src_n_in(E_idx)%sf(j, k, l)) - end if + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dx(j)* & + (flux_src_n_in(E_idx)%sf(j - 1, k, l) & + - flux_src_n_in(E_idx)%sf(j, k, l)) end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1598,17 +1598,17 @@ contains if (surface_tension) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dy(k)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j, k - 1, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dy(k)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j, k - 1, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1631,72 +1631,72 @@ contains end if $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & - (tau_Re_vf(i)%sf(j, -1, l) & - - tau_Re_vf(i)%sf(j, 1, l)) - end do + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) + 1._wp/(y_cc(1) - y_cc(-1))* & + (tau_Re_vf(i)%sf(j, -1, l) & + - tau_Re_vf(i)%sf(j, 1, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = 0, p - do k = 1, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do + do l = 0, p + do k = 1, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if ((surface_tension .or. viscous) .or. chem_params%diffusion) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_src_n_in(E_idx)%sf(j, k - 1, l) & - - flux_src_n_in(E_idx)%sf(j, k, l)) - end if + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_src_n_in(E_idx)%sf(j, k - 1, l) & + - flux_src_n_in(E_idx)%sf(j, k, l)) end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1707,51 +1707,51 @@ contains if ((bc_y%beg == -2) .or. (bc_y%beg == -14)) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = 0, p - do k = 1, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - + flux_src_n_in(i)%sf(j, k, l)) - end do + do l = 0, p + do k = 1, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + + flux_src_n_in(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) - do l = 0, p - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, 0, l) = & - rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & - tau_Re_vf(i)%sf(j, 0, l) - end do + do l = 0, p + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, 0, l) = & + rhs_vf(i)%sf(j, 0, l) - 1._wp/y_cc(0)* & + tau_Re_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if else $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & - (flux_src_n_in(i)%sf(j, k - 1, l) & - + flux_src_n_in(i)%sf(j, k, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) - 5.e-1_wp/y_cc(k)* & + (flux_src_n_in(i)%sf(j, k - 1, l) & + + flux_src_n_in(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1760,73 +1760,73 @@ contains if (surface_tension) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(c_idx)%sf(j, k, l) = & - rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dz(l)* & - q_prim_vf(c_idx)%sf(j, k, l)* & - (flux_src_n_in(advxb)%sf(j, k, l) - & - flux_src_n_in(advxb)%sf(j, k, l - 1)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(c_idx)%sf(j, k, l) = & + rhs_vf(c_idx)%sf(j, k, l) + 1._wp/dz(l)* & + q_prim_vf(c_idx)%sf(j, k, l)* & + (flux_src_n_in(advxb)%sf(j, k, l) - & + flux_src_n_in(advxb)%sf(j, k, l - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if ((surface_tension .or. viscous) .or. chem_params%diffusion) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - if (surface_tension .or. viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(i)%sf(j, k, l - 1) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (surface_tension .or. viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(i)%sf(j, k, l - 1) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + end if - if (chem_params%diffusion) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(i)%sf(j, k, l - 1) & - - flux_src_n_in(i)%sf(j, k, l)) - end do - if (.not. viscous) then - rhs_vf(E_idx)%sf(j, k, l) = & - rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_src_n_in(E_idx)%sf(j, k, l - 1) & - - flux_src_n_in(E_idx)%sf(j, k, l)) - end if + if (chem_params%diffusion) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(i)%sf(j, k, l - 1) & + - flux_src_n_in(i)%sf(j, k, l)) + end do + if (.not. viscous) then + rhs_vf(E_idx)%sf(j, k, l) = & + rhs_vf(E_idx)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_src_n_in(E_idx)%sf(j, k, l - 1) & + - flux_src_n_in(E_idx)%sf(j, k, l)) end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxb + 1)%sf(j, k, l) = & - rhs_vf(momxb + 1)%sf(j, k, l) + 5.e-1_wp* & - (flux_src_n_in(momxe)%sf(j, k, l - 1) & - + flux_src_n_in(momxe)%sf(j, k, l)) - - rhs_vf(momxe)%sf(j, k, l) = & - rhs_vf(momxe)%sf(j, k, l) - 5.e-1_wp* & - (flux_src_n_in(momxb + 1)%sf(j, k, l - 1) & - + flux_src_n_in(momxb + 1)%sf(j, k, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxb + 1)%sf(j, k, l) = & + rhs_vf(momxb + 1)%sf(j, k, l) + 5.e-1_wp* & + (flux_src_n_in(momxe)%sf(j, k, l - 1) & + + flux_src_n_in(momxe)%sf(j, k, l)) + + rhs_vf(momxe)%sf(j, k, l) = & + rhs_vf(momxe)%sf(j, k, l) - 5.e-1_wp* & + (flux_src_n_in(momxb + 1)%sf(j, k, l - 1) & + + flux_src_n_in(momxb + 1)%sf(j, k, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1940,42 +1940,42 @@ contains if (recon_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d2cce0820..6796966d6 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -359,621 +359,621 @@ contains if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R,G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do - - vel_L_rms = 0._wp; vel_R_rms = 0._wp + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) end if + end if - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp - - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - pres_mag%L = 0._wp - pres_mag%R = 0._wp + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) - end do - - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if + pres_mag%L = 0._wp + pres_mag%R = 0._wp + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) + + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! elastic energy update - !if ( hyperelasticity ) then - ! G_L = 0._wp - ! G_R = 0._wp - ! - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_fluids - ! G_L = G_L + alpha_L(i)*Gs_rs(i) - ! G_R = G_R + alpha_R(i)*Gs_rs(i) - ! end do - ! ! Elastic contribution to energy if G large enough - ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then - ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = 0._wp - ! tau_e_R(i) = 0._wp - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_dims - ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - ! end do - ! end if - !end if - - @:compute_average_state() - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) - - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + end if + end do + end if - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + ! elastic energy update + !if ( hyperelasticity ) then + ! G_L = 0._wp + ! G_R = 0._wp + ! + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_fluids + ! G_L = G_L + alpha_L(i)*Gs_rs(i) + ! G_R = G_R + alpha_R(i)*Gs_rs(i) + ! end do + ! ! Elastic contribution to energy if G large enough + ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then + ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = 0._wp + ! tau_e_R(i) = 0._wp + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_dims + ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + ! end do + ! end if + !end if + + @:compute_average_state() + + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) + + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) + + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) + + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + end if - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + if (viscous) then + if (chemistry) then + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if + if (wave_speeds == 1) then if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) + s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) + elseif (hypoelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L)) + else if (hyperelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) end if - if (viscous) then - if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - if (wave_speeds == 1) then - if (mhd) then - s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) - s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) - elseif (hypoelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L)) - else if (hyperelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - end if + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) - pres_SR = pres_SL + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + ! Mass + if (.not. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) + end do + end if - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R + ! Momentum + if (mhd .and. (.not. relativity)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & + /(s_M - s_P) + end do + elseif (mhd .and. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & + /(s_M - s_P) + end do + elseif (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + end if - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & + /(s_M - s_P) + end do + end if - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) + ! Advection + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp end if + end if - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) - end do - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if - ! Momentum - if (mhd .and. (.not. relativity)) then + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) end do - elseif (mhd .and. relativity) then + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if - - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do end if - ! Advection - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & /(s_M - s_P) - end do - - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if - end if - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do end if + #:endif - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) - end do - end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp - end if - - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - #:endif - - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1119,522 +1119,522 @@ contains if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, vel_grad_L, vel_grad_R, idx_right_phys]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do - - vel_L_rms = 0._wp; vel_R_rms = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) - end if - end if + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - pres_mag%L = 0._wp - pres_mag%R = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) + end if + end if - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) - end do + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp + + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp + pres_mag%L = 0._wp + pres_mag%R = 0._wp + + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) + + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if - - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + end if + end do + end if - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) - end if + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - s_L = 0._wp; s_R = 0._wp + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - s_L = s_L + vel_L(i)**2._wp - s_R = s_R + vel_R(i)**2._wp - end do + s_L = 0._wp; s_R = 0._wp - s_L = sqrt(s_L) - s_R = sqrt(s_R) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + s_L = s_L + vel_L(i)**2._wp + s_R = s_R + vel_R(i)**2._wp + end do - s_P = max(s_L, s_R) + max(c_L, c_R) - s_M = -s_P + s_L = sqrt(s_L) + s_R = sqrt(s_R) - s_L = s_M - s_R = s_P + s_P = max(s_L, s_R) + max(c_L, c_R) + s_M = -s_P - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + s_L = s_M + s_R = s_P - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) - end do - end if + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Momentum - if (mhd .and. (.not. relativity)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) - end do - elseif (mhd .and. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - end if + ! Mass + if (.not. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) + end do + end if - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & + ! Momentum + if (mhd .and. (.not. relativity)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & /(s_M - s_P) - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & + end do + elseif (mhd .and. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & + end do + elseif (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if - - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) - end do - end if + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + end if - ! Advection + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + do i = 1, num_dims + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if + + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & /(s_M - s_P) end do + end if - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + ! Advection + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do + + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp end if + end if - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) + end do + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) - end do - end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do end if - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - #:endif - end do + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & + /(s_M - s_P) + + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + end if + #:endif end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1642,196 +1642,196 @@ contains if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end - idx_right_phys(1) = j - idx_right_phys(2) = k - idx_right_phys(3) = l - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 - - if (norm_dir == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsx_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rsx_vf(j + 1, k, l, E_idx + i) - end do + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end + idx_right_phys(1) = j + idx_right_phys(2) = k + idx_right_phys(3) = l + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsx_vf(j, k, l, momxb + i - 1) - vel_R(i) = qR_prim_rsx_vf(j + 1, k, l, momxb + i - 1) - end do - else if (norm_dir == 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsy_vf(k, j, l, E_idx + i) - alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, E_idx + i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsy_vf(k, j, l, momxb + i - 1) - vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, momxb + i - 1) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsz_vf(l, k, j, E_idx + i) - alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, E_idx + i) - end do + if (norm_dir == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsx_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rsx_vf(j + 1, k, l, E_idx + i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsz_vf(l, k, j, momxb + i - 1) - vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, momxb + i - 1) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsx_vf(j, k, l, momxb + i - 1) + vel_R(i) = qR_prim_rsx_vf(j + 1, k, l, momxb + i - 1) + end do + else if (norm_dir == 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsy_vf(k, j, l, E_idx + i) + alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, E_idx + i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsy_vf(k, j, l, momxb + i - 1) + vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, momxb + i - 1) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsz_vf(l, k, j, E_idx + i) + alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, E_idx + i) + end do $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + do i = 1, num_dims + vel_L(i) = qL_prim_rsz_vf(l, k, j, momxb + i - 1) + vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, momxb + i - 1) + end do + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - if (shear_stress) then - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do - if (norm_dir == 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) - end if - end if + if (shear_stress) then - else if (norm_dir == 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + end do - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (norm_dir == 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) end if - else - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + end if - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) + else if (norm_dir == 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) end if - end if + else + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - if (bulk_stress) then + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if - end do + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) - if (norm_dir == 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - end if - end if + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + + end if + end if + + if (bulk_stress) then - else if (norm_dir == 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if + end do - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (norm_dir == 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) end if - else - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + end if - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + else if (norm_dir == 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) end if + else + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) end if - end do + + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1992,1555 +1992,1555 @@ contains if (model_eqns == 3) then !ME3 $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,vel_L, vel_R, vel_K_Star, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, G_L, G_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + idx1 = dir_idx(1) - idx1 = dir_idx(1) + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do + end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + end do - if (mpp_lim) then + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + end if - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp; + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if + + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + + @:compute_average_state() + + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) + + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do + end if - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + ! COMPUTING THE DIRECT WAVE SPEEDS + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - end if + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - @:compute_average_state() + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + ! goes with numerical star velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) + xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + ! goes with the numerical velocity in x/y/z directions + ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) + xi_MP = -min(0._wp, sign(1._wp, s_L)) + xi_PP = max(0._wp, sign(1._wp, s_R)) - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & + xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) + p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & + xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & + xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) - ! COMPUTING THE DIRECT WAVE SPEEDS - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & + xi_MP*xi_PP*(s_S - vel_R(idx1)) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - - ! goes with numerical star velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) - xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) - - ! goes with the numerical velocity in x/y/z directions - ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) - xi_MP = -min(0._wp, sign(1._wp, s_L)) - xi_PP = max(0._wp, sign(1._wp, s_R)) - - E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & - xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) - p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & - xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) - - rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & - xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) - - vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & - xi_MP*xi_PP*(s_S - vel_R(idx1)) - - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + ! COMPUTING FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do - ! COMPUTING FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & + (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + end do - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp; $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & - (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + ! MOMENTUM ELASTIC FLUX. + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S + end do - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if + ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & + xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + end do - ! VOLUME FRACTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S - end do + ! INTERNAL ENERGIES ADVECTION FLUX. + ! K-th pressure and velocity in preparation for the internal energy flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & + xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & + xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & + xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) + + flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & + ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & + (gammas(i)*p_K_Star + pi_infs(i)) + & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & + qvs(i))*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) + end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & - xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do + end if - ! INTERNAL ENERGIES ADVECTION FLUX. - ! K-th pressure and velocity in preparation for the internal energy flux + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & - xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & - xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & - xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) - - flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & - ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & - (gammas(i)*p_K_Star + pi_infs(i)) + & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & - qvs(i))*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + end if - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - end if - - ! REFERENCE MAP FLUX. - if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) + do i = intxb, intxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if + #:endif - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = intxb, intxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 4) then !ME4 $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, vel_avg_rms, nbub_L, nbub_R, ptilde_L, ptilde_R]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - @:compute_average_state() + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + @:compute_average_state() - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + pres_SR = pres_SL - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*alpha_rho_L(i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*alpha_rho_R(i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if + + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*alpha_rho_L(i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*alpha_rho_R(i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_L) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_R) + end do - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + if (bubbles_euler) then + ! Put p_tilde in $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_L) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_R) + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & + xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & + + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) end do + end if - if (bubbles_euler) then - ! Put p_tilde in - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & - xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & - + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) - end do - end if + flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = alf_idx, alf_idx !only advect the void fraction + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp + end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Add advection flux for bubble variables + if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') - do i = alf_idx, alf_idx !only advect the void fraction + do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do + end if - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp - end do - - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Geometrical source flux for cylindrical coordinates - ! Add advection flux for bubble variables - if (bubbles_euler) then + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if - - ! Geometrical source flux for cylindrical coordinates - - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - end do + #:endif end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 2 .and. bubbles_euler) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, s_L, s_R, s_S, nbub_L, nbub_R, ptilde_L, ptilde_R, vel_avg_rms, Re_L, Re_R, pcorr, zcoef, vel_L_tmp, vel_R_tmp]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do + ! Retain this in the refactor + if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do - + else if (num_fluids > 2) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, num_fluids - 1 + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do + else + rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) + gamma_L = gammas(1) + pi_inf_L = pi_infs(1) + qv_L = qvs(1) + rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) + gamma_R = gammas(1) + pi_inf_R = pi_infs(1) + qv_R = qvs(1) + end if - ! Retain this in the refactor - if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do - else if (num_fluids > 2) then + if (viscous) then + if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real + + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_R(i) + end do + + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do - else - rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) - gamma_L = gammas(1) - pi_inf_L = pi_infs(1) - qv_L = qvs(1) - rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) - gamma_R = gammas(1) - pi_inf_R = pi_infs(1) - qv_R = qvs(1) end if + end if - if (viscous) then - if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_R(i) - end do + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + if (avg_state == 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) + R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) + + V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) + V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) + if (.not. polytropic .and. .not. qbmm) then + P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) + P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) + end if + end do + if (.not. qbmm) then + if (adv_n) then + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) + else + nbub_L_denom = 0._wp + nbub_R_denom = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) + nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) end do + nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom + nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom end if + else + !nb stored in 0th moment of first R0 bin in variable conversion module + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms - - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - - if (avg_state == 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) - R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) - - V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) - V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) - if (.not. polytropic .and. .not. qbmm) then - P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) - P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) - end if - end do - + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb if (.not. qbmm) then - if (adv_n) then - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) + if (polytropic) then + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), 0._wp) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), 0._wp) else - nbub_L_denom = 0._wp - nbub_R_denom = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) - nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) - end do - nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom - nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) end if - else - !nb stored in 0th moment of first R0 bin in variable conversion module - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - if (.not. qbmm) then - if (polytropic) then - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), 0._wp) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), 0._wp) - else - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) - end if - end if - end do + if (qbmm) then + PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) + PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) - if (qbmm) then - PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) - PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) + R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) + R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) - R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) - R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) + R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) + R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) + else - R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) - R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) - else + PbwR3Lbar = 0._wp + PbwR3Rbar = 0._wp - PbwR3Lbar = 0._wp - PbwR3Rbar = 0._wp + R3Lbar = 0._wp + R3Rbar = 0._wp - R3Lbar = 0._wp - R3Rbar = 0._wp + R3V2Lbar = 0._wp + R3V2Rbar = 0._wp - R3V2Lbar = 0._wp - R3V2Rbar = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) + PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) - PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) + R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) + R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) - R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) - R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) + R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) + R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) + end do + end if - R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) - R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) - end do - end if + if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L + else + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & + rho_L*R3V2Lbar/R3Lbar) + end if - if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L - else - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & - rho_L*R3V2Lbar/R3Lbar) - end if + if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R + else + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & + rho_R*R3V2Rbar/R3Rbar) + end if - if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R - else - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & - rho_R*R3V2Rbar/R3Rbar) - end if + if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then + end if - if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then - end if + rho_avg = 5.e-1_wp*(rho_L + rho_R) + H_avg = 5.e-1_wp*(H_L + H_R) + gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) + vel_avg_rms = 0._wp - rho_avg = 5.e-1_wp*(rho_L + rho_R) - H_avg = 5.e-1_wp*(H_L + H_R) - gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) - vel_avg_rms = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp - end do + end if - end if + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - if (bubbles_euler .and. (num_fluids > 1)) then - ! Kill mass transport @ gas density - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - ! Include p_tilde + if (bubbles_euler .and. (num_fluids > 1)) then + ! Kill mass transport @ gas density + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr - end do + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - ! Energy flux. - ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + (pres_L - ptilde_L)/ & - (s_L - vel_L(dir_idx(1))))) - E_L)) & - + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + (pres_R - ptilde_R)/ & - (s_R - vel_R(dir_idx(1))))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! Volume fraction flux - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! Include p_tilde - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & - xi_M*(vel_L(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_P*(xi_R - 1._wp)) - - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr + end do - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Energy flux. + ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + (pres_L - ptilde_L)/ & + (s_L - vel_L(dir_idx(1))))) - E_L)) & + + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + (pres_R - ptilde_R)/ & + (s_R - vel_R(dir_idx(1))))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! Volume fraction flux + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - ! Add advection flux for bubble variables - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & + xi_M*(vel_L(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_P*(xi_R - 1._wp)) + + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp + end do - if (qbmm) then - flux_rs${XYZ}$_vf(j, k, l, bubxb) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) - if (adv_n) then - flux_rs${XYZ}$_vf(j, k, l, n_idx) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + ! Add advection flux for bubble variables + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + if (qbmm) then + flux_rs${XYZ}$_vf(j, k, l, bubxb) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if + + if (adv_n) then + flux_rs${XYZ}$_vf(j, k, l, n_idx) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if + + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - end do + end if + #:endif end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! 5-EQUATION MODEL WITH HLLC $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,T_L, T_R, vel_L, vel_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2]', copyin='[is1, is2, is3]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - !idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + ! Change this by splitting it into the cases + ! present in the bubbles_euler + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do + end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! Change this by splitting it into the cases - ! present in the bubbles_euler - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do + if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - end if + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - if (chemistry) then - c_sum_Yi_Phi = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + if (chemistry) then + c_sum_Yi_Phi = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - end if + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do + end do + end if + + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - @:compute_average_state() + @:compute_average_state() - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) - if (viscous) then - if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do + if (viscous) then + if (chemistry) then + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(idx1) - & - vel_R(idx1))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(idx1) - c_L*Ms_L - s_R = vel_R(idx1) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(idx1) - & + vel_R(idx1))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + pres_SR = pres_SL - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + s_L = vel_L(idx1) - c_L*Ms_L + s_R = vel_R(idx1) + c_R*Ms_R - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! COMPUTING THE HLLC FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! COMPUTING THE HLLC FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do + + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idxi) + & + s_M*(xi_L*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_L(idxi)) - vel_L(idxi))) + & + dir_flg(idxi)*(pres_L)) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idxi) + & + s_P*(xi_R*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_R(idxi)) - vel_R(idxi))) + & + dir_flg(idxi)*(pres_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + end do - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(idx1)*(E_L + pres_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & + (rho_L*s_S + pres_L/ & + (s_L - vel_L(idx1)))) - E_L)) & + + xi_P*(vel_R(idx1)*(E_R + pres_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & + (rho_R*s_S + pres_R/ & + (s_R - vel_R(idx1)))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) + ! MOMENTUM ELASTIC FLUX. flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idxi) + & - s_M*(xi_L*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_L(idxi)) - vel_L(idxi))) + & - dir_flg(idxi)*(pres_L)) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idxi) + & - s_P*(xi_R*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_R(idxi)) - vel_R(idxi))) + & - dir_flg(idxi)*(pres_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(idx1)*(E_L + pres_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & - (rho_L*s_S + pres_L/ & - (s_L - vel_L(idx1)))) - E_L)) & - + xi_P*(vel_R(idx1)*(E_R + pres_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & - (rho_R*s_S + pres_R/ & - (s_R - vel_R(idx1)))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if - - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) - end do - end if - - ! VOLUME FRACTION FLUX. + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do + end if + + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do + + ! VOLUME FRACTION SOURCE FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + & + dir_flg(idxi)* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(idxi) + & + dir_flg(idxi)* & + s_P*(xi_R - 1._wp)) + end do + + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end if - ! VOLUME FRACTION SOURCE FLUX. + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + & - dir_flg(idxi)* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(idxi) + & - dir_flg(idxi)* & - s_P*(xi_R - 1._wp)) + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do + end if - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - ! REFERENCE MAP FLUX. - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) - end do - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + end do + end if - if (chemistry) then + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - - xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + - xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif + end if + #:endif - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -3656,177 +3656,177 @@ contains if (norm_dir == ${NORM_DIR}$) then #:block UNDEF_AMD $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - - ! (1) Extract the left/right primitive states - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic - do i = 1, num_vels - vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) - vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) - end do + ! (1) Extract the left/right primitive states + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - vel_rms%L = sum(vel%L**2._wp) - vel_rms%R = sum(vel%R**2._wp) + ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic + do i = 1, num_vels + vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) + end do - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + vel_rms%L = sum(vel%L**2._wp) + vel_rms%R = sum(vel%R**2._wp) - pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated - B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] - B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] - else ! 2D/3D: Bx, By, Bz as variables - B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] - B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] - end if - end if + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - ! Sum properties of all fluid components - rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp - rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho%L = rho%L + alpha_rho_L(i) - gamma%L = gamma%L + alpha_L(i)*gammas(i) - pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) - qv%L = qv%L + alpha_rho_L(i)*qvs(i) - - rho%R = rho%R + alpha_rho_R(i) - gamma%R = gamma%R + alpha_R(i)*gammas(i) - pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) - qv%R = qv%R + alpha_rho_R(i)*qvs(i) - end do + pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - pres_mag%L = 0.5_wp*sum(B%L**2._wp) - pres_mag%R = 0.5_wp*sum(B%R**2._wp) - E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L - E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy - H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L - H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - - ! (2) Compute fast wave speeds - call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) - call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) - call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) - call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) - - ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] - s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) - s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) - - pTot_L = pres%L + pres_mag%L - pTot_R = pres%R + pres_mag%R - - s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & - (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & - ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) - - ! (4) Compute star state variables - rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) - rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) - p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) - E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) - E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) - - ! (5) Compute left/right state vectors and fluxes - U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] - U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] - U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] - U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] - - ! Compute the left/right fluxes - F_L(1) = U_L(2) - F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L - F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) - F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) - F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) - - F_R(1) = U_R(2) - F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R - F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) - F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) - F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) - ! Compute the star flux using HLL relation - F_starL = F_L + s_L*(U_starL - U_L) - F_starR = F_R + s_R*(U_starR - U_R) - ! Compute the rotational (Alfvén) speeds - s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) - s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) - ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] - sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) - vL_star = vel%L(2); wL_star = vel%L(3) - vR_star = vel%R(2); wR_star = vel%R(3) - - ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] - denom_ds = sqrt_rhoL_star + sqrt_rhoR_star - sign_Bx = sign(1._wp, B%L(1)) - v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds - w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds - By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds - Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds - - E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_double = 0.5_wp*(E_doubleL + E_doubleR) - - U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] - U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] - - ! (11) Choose HLLD flux based on wave-speed regions - if (0.0_wp <= s_L) then - F_hlld = F_L - else if (0.0_wp <= s_starL) then - F_hlld = F_L + s_L*(U_starL - U_L) - else if (0.0_wp <= s_M) then - F_hlld = F_starL + s_starL*(U_doubleL - U_starL) - else if (0.0_wp <= s_starR) then - F_hlld = F_starR + s_starR*(U_doubleR - U_starR) - else if (0.0_wp <= s_R) then - F_hlld = F_R + s_R*(U_starR - U_R) - else - F_hlld = F_R + ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated + B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] + B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] + else ! 2D/3D: Bx, By, Bz as variables + B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] + B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] end if + end if - ! (12) Reorder and write temporary variables to the flux array - ! Mass - flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component - ! Momentum - flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) - ! Magnetic field - if (n == 0) then - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) - else - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) - end if - ! Energy - flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) - ! Partial fraction - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) - end do + ! Sum properties of all fluid components + rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp + rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho%L = rho%L + alpha_rho_L(i) + gamma%L = gamma%L + alpha_L(i)*gammas(i) + pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) + qv%L = qv%L + alpha_rho_L(i)*qvs(i) + + rho%R = rho%R + alpha_rho_R(i) + gamma%R = gamma%R + alpha_R(i)*gammas(i) + pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) + qv%R = qv%R + alpha_rho_R(i)*qvs(i) + end do + + pres_mag%L = 0.5_wp*sum(B%L**2._wp) + pres_mag%R = 0.5_wp*sum(B%R**2._wp) + E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L + E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy + H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L + H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + + ! (2) Compute fast wave speeds + call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) + call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) + call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) + call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) + + ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] + s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) + s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) + + pTot_L = pres%L + pres_mag%L + pTot_R = pres%R + pres_mag%R + + s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & + (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & + ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) + + ! (4) Compute star state variables + rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) + rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) + p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) + E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) + E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) + + ! (5) Compute left/right state vectors and fluxes + U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] + U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] + U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] + U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] + + ! Compute the left/right fluxes + F_L(1) = U_L(2) + F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L + F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) + F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) + F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) + + F_R(1) = U_R(2) + F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R + F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) + F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) + F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) + ! Compute the star flux using HLL relation + F_starL = F_L + s_L*(U_starL - U_L) + F_starR = F_R + s_R*(U_starR - U_R) + ! Compute the rotational (Alfvén) speeds + s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) + s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) + ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] + sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) + vL_star = vel%L(2); wL_star = vel%L(3) + vR_star = vel%R(2); wR_star = vel%R(3) + + ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] + denom_ds = sqrt_rhoL_star + sqrt_rhoR_star + sign_Bx = sign(1._wp, B%L(1)) + v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds + w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds + By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds + Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds + + E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_double = 0.5_wp*(E_doubleL + E_doubleR) + + U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] + U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] + + ! (11) Choose HLLD flux based on wave-speed regions + if (0.0_wp <= s_L) then + F_hlld = F_L + else if (0.0_wp <= s_starL) then + F_hlld = F_L + s_L*(U_starL - U_L) + else if (0.0_wp <= s_M) then + F_hlld = F_starL + s_starL*(U_doubleL - U_starL) + else if (0.0_wp <= s_starR) then + F_hlld = F_starR + s_starR*(U_doubleR - U_starR) + else if (0.0_wp <= s_R) then + F_hlld = F_R + s_R*(U_starR - U_R) + else + F_hlld = F_R + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + ! (12) Reorder and write temporary variables to the flux array + ! Mass + flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component + ! Momentum + flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) + ! Magnetic field + if (n == 0) then + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) + else + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) + end if + ! Energy + flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) + ! Partial fraction + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endblock UNDEF_AMD end if @@ -4032,53 +4032,53 @@ contains if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsx_vf(-1, k, l, i) = & - qR_prim_rsx_vf(0, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsx_vf(-1, k, l, i) = & + qR_prim_rsx_vf(0, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dx_vf(i)%sf(-1, k, l) = & - dqR_prim_dx_vf(i)%sf(0, k, l) - end do + dqL_prim_dx_vf(i)%sf(-1, k, l) = & + dqR_prim_dx_vf(i)%sf(0, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dy_vf(i)%sf(-1, k, l) = & - dqR_prim_dy_vf(i)%sf(0, k, l) - end do + dqL_prim_dy_vf(i)%sf(-1, k, l) = & + dqR_prim_dy_vf(i)%sf(0, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dz_vf(i)%sf(-1, k, l) = & - dqR_prim_dz_vf(i)%sf(0, k, l) - end do + dqL_prim_dz_vf(i)%sf(-1, k, l) = & + dqR_prim_dz_vf(i)%sf(0, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4091,54 +4091,54 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsx_vf(m + 1, k, l, i) = & - qL_prim_rsx_vf(m, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsx_vf(m + 1, k, l, i) = & + qL_prim_rsx_vf(m, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dx_vf(i)%sf(m, k, l) - end do + dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dx_vf(i)%sf(m, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dy_vf(i)%sf(m, k, l) - end do + dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dy_vf(i)%sf(m, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dz_vf(i)%sf(m, k, l) - end do + dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dz_vf(i)%sf(m, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4154,50 +4154,50 @@ contains if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsy_vf(-1, k, l, i) = & - qR_prim_rsy_vf(0, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsy_vf(-1, k, l, i) = & + qR_prim_rsy_vf(0, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, -1, l) = & - dqR_prim_dx_vf(i)%sf(j, 0, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, -1, l) = & + dqR_prim_dx_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, -1, l) = & - dqR_prim_dy_vf(i)%sf(j, 0, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, -1, l) = & + dqR_prim_dy_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, -1, l) = & - dqR_prim_dz_vf(i)%sf(j, 0, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, -1, l) = & + dqR_prim_dz_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4208,50 +4208,50 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsy_vf(n + 1, k, l, i) = & - qL_prim_rsy_vf(n, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsy_vf(n + 1, k, l, i) = & + qL_prim_rsy_vf(n, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dx_vf(i)%sf(j, n, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dx_vf(i)%sf(j, n, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dy_vf(i)%sf(j, n, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dy_vf(i)%sf(j, n, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dz_vf(i)%sf(j, n, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dz_vf(i)%sf(j, n, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4265,46 +4265,46 @@ contains if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsz_vf(-1, k, l, i) = & - qR_prim_rsz_vf(0, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsz_vf(-1, k, l, i) = & + qR_prim_rsz_vf(0, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, k, -1) = & - dqR_prim_dx_vf(i)%sf(j, k, 0) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, k, -1) = & + dqR_prim_dx_vf(i)%sf(j, k, 0) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, k, -1) = & - dqR_prim_dy_vf(i)%sf(j, k, 0) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, k, -1) = & + dqR_prim_dy_vf(i)%sf(j, k, 0) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, k, -1) = & - dqR_prim_dz_vf(i)%sf(j, k, 0) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, k, -1) = & + dqR_prim_dz_vf(i)%sf(j, k, 0) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4313,48 +4313,48 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsz_vf(p + 1, k, l, i) = & - qL_prim_rsz_vf(p, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsz_vf(p + 1, k, l, i) = & + qL_prim_rsz_vf(p, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dx_vf(i)%sf(j, k, p) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dx_vf(i)%sf(j, k, p) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dy_vf(i)%sf(j, k, p) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dy_vf(i)%sf(j, k, p) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,k,j]', collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dz_vf(i)%sf(j, k, p) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dz_vf(i)%sf(j, k, p) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4400,45 +4400,45 @@ contains if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = 0._wp - end do + do i = momxb, E_idx + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(j, k, l) = 0._wp - end if - end do + do i = E_idx, chemxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(j, k, l) = 0._wp + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) - end do + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4447,45 +4447,45 @@ contains if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = 0._wp - end do + do i = momxb, E_idx + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(k, j, l) = 0._wp - end if - end do + do i = E_idx, chemxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(k, j, l) = 0._wp + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) - end do + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4494,45 +4494,45 @@ contains if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = momxb, E_idx - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = 0._wp - end do + do i = momxb, E_idx + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = E_idx, chemxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(l, k, j) = 0._wp - end if - end do + do i = E_idx, chemxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(l, k, j) = 0._wp + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) - end do + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4591,111 +4591,111 @@ contains integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') - do l = iz%beg, iz%end - do k = iy%beg, iy%end - do j = ix%beg, ix%end + do l = iz%beg, iz%end + do k = iy%beg, iy%end + do j = ix%beg, ix%end + + ! Determine indices for the 'right' state for averaging across the interface + idx_rp = [j, k, l] + idx_rp(norm_dir) = idx_rp(norm_dir) + 1 + + ! Average velocities and their derivatives at the interface + ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + + avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & + dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + if (num_dims > 1) then + avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & + dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdy_int(i_vel) = 0.0_wp + end if + if (num_dims > 2) then + avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & + dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdz_int(i_vel) = 0.0_wp + end if + end do - ! Determine indices for the 'right' state for averaging across the interface - idx_rp = [j, k, l] - idx_rp(norm_dir) = idx_rp(norm_dir) + 1 + ! Get Re numbers and interface velocity for viscous work + select case (norm_dir) + case (1) ! x-face (axial face in z_cyl direction) + Re_s = Re_avg_rsx_vf(j, k, l, 1) + Re_b = Re_avg_rsx_vf(j, k, l, 2) + vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) + r_eff = y_cc(k) + case (2) ! y-face (radial face in r_cyl direction) + Re_s = Re_avg_rsy_vf(k, j, l, 1) + Re_b = Re_avg_rsy_vf(k, j, l, 2) + vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) + r_eff = y_cb(k) + case (3) ! z-face (azimuthal face in theta_cyl direction) + Re_s = Re_avg_rsz_vf(l, k, j, 1) + Re_b = Re_avg_rsz_vf(l, k, j, 2) + vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) + r_eff = y_cc(k) + end select + + ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) + divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff + if (num_dims > 2) then + divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff + end if - ! Average velocities and their derivatives at the interface - ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + stress_vector_shear = 0.0_wp + stress_normal_bulk = 0.0_wp - avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & - dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + if (shear_stress) then + div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s + + select case (norm_dir) + case (1) ! X-face (axial normal, z_cyl) + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const if (num_dims > 1) then - avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & - dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdy_int(i_vel) = 0.0_wp + stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s end if if (num_dims > 2) then - avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & - dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdz_int(i_vel) = 0.0_wp + stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s end if - end do - - ! Get Re numbers and interface velocity for viscous work - select case (norm_dir) - case (1) ! x-face (axial face in z_cyl direction) - Re_s = Re_avg_rsx_vf(j, k, l, 1) - Re_b = Re_avg_rsx_vf(j, k, l, 2) - vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) - r_eff = y_cc(k) - case (2) ! y-face (radial face in r_cyl direction) - Re_s = Re_avg_rsy_vf(k, j, l, 1) - Re_b = Re_avg_rsy_vf(k, j, l, 2) - vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) - r_eff = y_cb(k) - case (3) ! z-face (azimuthal face in theta_cyl direction) - Re_s = Re_avg_rsz_vf(l, k, j, 1) - Re_b = Re_avg_rsz_vf(l, k, j, 2) - vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) - r_eff = y_cc(k) - end select - - ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) - divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff - if (num_dims > 2) then - divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff - end if - - stress_vector_shear = 0.0_wp - stress_normal_bulk = 0.0_wp - - if (shear_stress) then - div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s - - select case (norm_dir) - case (1) ! X-face (axial normal, z_cyl) - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - if (num_dims > 1) then - stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - end if - if (num_dims > 2) then - stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - end if - case (2) ! Y-face (radial normal, r_cyl) - if (num_dims > 1) then - stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const - if (num_dims > 2) then - stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - end if - else - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - end if - case (3) ! Z-face (azimuthal normal, theta_cyl) + case (2) ! Y-face (radial normal, r_cyl) + if (num_dims > 1) then + stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const if (num_dims > 2) then - stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s end if - end select + else + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const + end if + case (3) ! Z-face (azimuthal normal, theta_cyl) + if (num_dims > 2) then + stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s + stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s + stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + end if + end select - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) + end do + end if - if (bulk_stress) then - stress_normal_bulk = divergence_cyl/Re_b + if (bulk_stress) then + stress_normal_bulk = divergence_cyl/Re_b - flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk - end if + flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cylindrical_viscous_source_flux @@ -4751,86 +4751,86 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. $:GPU_PARALLEL_LOOP(collapse=3, private='[j_loop,k_loop,l_loop,idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') - do l_loop = isz%beg, isz%end - do k_loop = isy%beg, isy%end - do j_loop = isx%beg, isx%end + do l_loop = isz%beg, isz%end + do k_loop = isy%beg, isy%end + do j_loop = isx%beg, isx%end + + idx_right_phys(1) = j_loop + idx_right_phys(2) = k_loop + idx_right_phys(3) = l_loop + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + + vel_grad_avg = 0.0_wp + do vel_comp_idx = 1, num_dims + vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + if (num_dims > 1) then + vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if + if (num_dims > 2) then + vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if + end do - idx_right_phys(1) = j_loop - idx_right_phys(2) = k_loop - idx_right_phys(3) = l_loop - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + divergence_v = 0.0_wp + do i_dim = 1, num_dims + divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) + end do - vel_grad_avg = 0.0_wp - do vel_comp_idx = 1, num_dims - vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - if (num_dims > 1) then - vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if - if (num_dims > 2) then - vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if + vel_src_at_interface = 0.0_wp + if (norm_dir == 1) then + Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) + Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) end do - - divergence_v = 0.0_wp + else if (norm_dir == 2) then + Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) + Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) do i_dim = 1, num_dims - divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) + vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) end do + else + Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) + Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) + end do + end if - vel_src_at_interface = 0.0_wp - if (norm_dir == 1) then - Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) - Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) - end do - else if (norm_dir == 2) then - Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) - Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) - end do - else - Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) - Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) - end do - end if - - if (shear_stress) then - ! current_tau_shear = 0.0_wp - call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) + if (shear_stress) then + ! current_tau_shear = 0.0_wp + call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) + end do + end if - if (bulk_stress) then - ! current_tau_bulk = 0.0_wp - call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) + if (bulk_stress) then + ! current_tau_bulk = 0.0_wp + call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) + end do + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cartesian_viscous_source_flux @@ -4913,153 +4913,153 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_vf(i)%sf(k, j, l) = & - flux_rsy_vf(j, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_vf(i)%sf(k, j, l) = & + flux_rsy_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (cyl_coord) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_gsrc_vf(i)%sf(k, j, l) = & - flux_gsrc_rsy_vf(j, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_gsrc_vf(i)%sf(k, j, l) = & + flux_gsrc_rsy_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(advxb)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, advxb) - end do + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(advxb)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, advxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, i) - end do + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_vf(i)%sf(l, k, j) = & - flux_rsz_vf(j, k, l, i) - end do + flux_vf(i)%sf(l, k, j) = & + flux_rsz_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_gsrc_vf(i)%sf(l, k, j) = & - flux_gsrc_rsz_vf(j, k, l, i) - end do + flux_gsrc_vf(i)%sf(l, k, j) = & + flux_gsrc_rsz_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[i,k,l]', collapse=3) - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(advxb)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, advxb) - end do + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(advxb)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, advxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = advxb + 1, advxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, i) - end do + do i = advxb + 1, advxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_vf(i)%sf(j, k, l) = & - flux_rsx_vf(j, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_vf(i)%sf(j, k, l) = & + flux_rsx_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(advxb)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, advxb) - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(advxb)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, advxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, i) - end do + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index c6ffa4a18..a6499901a 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -89,139 +89,139 @@ contains if (id == 1) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_x(j, k, l, 1) - w2L = gL_x(j, k, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_x(j, k, l, 3) + w1L = gL_x(j, k, l, 1) + w2L = gL_x(j, k, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_x(j, k, l, 3) - w1R = gR_x(j + 1, k, l, 1) - w2R = gR_x(j + 1, k, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_x(j + 1, k, l, 3) + w1R = gR_x(j + 1, k, l, 1) + w2R = gR_x(j + 1, k, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_x(j + 1, k, l, 3) - normWL = gL_x(j, k, l, num_dims + 1) - normWR = gR_x(j + 1, k, l, num_dims + 1) + normWL = gL_x(j, k, l, num_dims + 1) + normWR = gR_x(j + 1, k, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(1, i)*vSrc_rsx_vf(j, k, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(1, i)*vSrc_rsx_vf(j, k, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (id == 2) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_y(k, j, l, 1) - w2L = gL_y(k, j, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_y(k, j, l, 3) + w1L = gL_y(k, j, l, 1) + w2L = gL_y(k, j, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_y(k, j, l, 3) - w1R = gR_y(k + 1, j, l, 1) - w2R = gR_y(k + 1, j, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_y(k + 1, j, l, 3) + w1R = gR_y(k + 1, j, l, 1) + w2R = gR_y(k + 1, j, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_y(k + 1, j, l, 3) - normWL = gL_y(k, j, l, num_dims + 1) - normWR = gR_y(k + 1, j, l, num_dims + 1) + normWL = gL_y(k, j, l, num_dims + 1) + normWR = gR_y(k + 1, j, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(2, i)*vSrc_rsy_vf(k, j, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(2, i)*vSrc_rsy_vf(k, j, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (id == 3) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_z(l, k, j, 1) - w2L = gL_z(l, k, j, 2) - w3L = 0._wp - if (p > 0) w3L = gL_z(l, k, j, 3) + w1L = gL_z(l, k, j, 1) + w2L = gL_z(l, k, j, 2) + w3L = 0._wp + if (p > 0) w3L = gL_z(l, k, j, 3) - w1R = gR_z(l + 1, k, j, 1) - w2R = gR_z(l + 1, k, j, 2) - w3R = 0._wp - if (p > 0) w3R = gR_z(l + 1, k, j, 3) + w1R = gR_z(l + 1, k, j, 1) + w2R = gR_z(l + 1, k, j, 2) + w3R = 0._wp + if (p > 0) w3R = gR_z(l + 1, k, j, 3) - normWL = gL_z(l, k, j, num_dims + 1) - normWR = gR_z(l + 1, k, j, num_dims + 1) + normWL = gL_z(l, k, j, num_dims + 1) + normWR = gR_z(l + 1, k, j, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(3, i)*vSrc_rsz_vf(l, k, j, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(3, i)*vSrc_rsz_vf(l, k, j, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -244,56 +244,56 @@ contains ! compute gradient components $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & - (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & + (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & - (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & + (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & - (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & + (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(num_dims + 1)%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - c_divs(num_dims + 1)%sf(j, k, l) = & - c_divs(num_dims + 1)%sf(j, k, l) + & - c_divs(i)%sf(j, k, l)**2._wp - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(num_dims + 1)%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims c_divs(num_dims + 1)%sf(j, k, l) = & - sqrt(c_divs(num_dims + 1)%sf(j, k, l)) + c_divs(num_dims + 1)%sf(j, k, l) + & + c_divs(i)%sf(j, k, l)**2._wp end do + c_divs(num_dims + 1)%sf(j, k, l) = & + sqrt(c_divs(num_dims + 1)%sf(j, k, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() call s_populate_capillary_buffers(c_divs, bc_type) @@ -344,42 +344,42 @@ contains if (recon_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 9a8d38796..801e301a9 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -508,50 +508,50 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - if (s == 1 .and. nstage > 1) then - q_cons_ts(stor)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) - end if - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + if (s == 1 .and. nstage > 1) then + q_cons_ts(stor)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) + end if + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l,q]', collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - if (s == 1 .and. nstage > 1) then - pb_ts(stor)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) - mv_ts(stor)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) - end if - pb_ts(1)%sf(j, k, l, q, i) = & - (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) - mv_ts(1)%sf(j, k, l, q, i) = & - (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) - end do + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + if (s == 1 .and. nstage > 1) then + pb_ts(stor)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) + mv_ts(stor)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) + end if + pb_ts(1)%sf(j, k, l, q, i) = & + (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) + mv_ts(1)%sf(j, k, l, q, i) = & + (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -683,22 +683,22 @@ contains end if $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,vel, alpha, Re]') - do l = 0, p - do k = 0, n - do j = 0, m - if (igr) then - call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - else - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (igr) then + call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + else + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + end if - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) - end do + call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') @@ -731,16 +731,16 @@ contains call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & - ldt*rhs_vf_in(i)%sf(j, k, l) - end do + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & + ldt*rhs_vf_in(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -758,66 +758,66 @@ contains if (t_step == t_step_start) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! All other timesteps $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_ts(2)%vf(i)%sf(j, k, l) - q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_ts(1)%vf(i)%sf(j, k, l) - q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_ts(0)%vf(i)%sf(j, k, l) - q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts(3)%vf(i)%sf(j, k, l) = q_prim_ts(2)%vf(i)%sf(j, k, l) + q_prim_ts(2)%vf(i)%sf(j, k, l) = q_prim_ts(1)%vf(i)%sf(j, k, l) + q_prim_ts(1)%vf(i)%sf(j, k, l) = q_prim_ts(0)%vf(i)%sf(j, k, l) + q_prim_ts(0)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 56f52c239..13a4e4de0 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -81,226 +81,226 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - tau_Re_vf(i)%sf(j, k, l) = 0._wp - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + tau_Re_vf(i)%sf(j, k, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (shear_stress) then ! Shear stresses $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do - end do - end if - end if + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & - grad_x_vf(2)%sf(j, k, l))/ & - Re_visc(1) + end if - tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & - - 2._wp*grad_x_vf(1)%sf(j, k, l) & - - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - (3._wp*Re_visc(1)) $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do + end if + end if + + tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & + grad_x_vf(2)%sf(j, k, l))/ & + Re_visc(1) + + tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & + - 2._wp*grad_x_vf(1)%sf(j, k, l) & + - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + (3._wp*Re_visc(1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bulk_stress) then ! Bulk stresses $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - end if + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do end if + end if - tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & - grad_y_vf(2)%sf(j, k, l) + & - q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - Re_visc(2) + tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & + grad_y_vf(2)%sf(j, k, l) + & + q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -308,211 +308,211 @@ contains if (shear_stress) then ! Shear stresses $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - end if + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do end if + end if - tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(1) + tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(1) - tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & - q_prim_vf(momxe)%sf(j, k, l))/ & - y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & - Re_visc(1) + tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & + q_prim_vf(momxe)%sf(j, k, l))/ & + y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & + Re_visc(1) - $:GPU_LOOP(parallelism='[seq]') - do i = 2, 3 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 2, 3 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) end do + end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bulk_stress) then ! Bulk stresses $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do - end if + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) end do - end if + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do end if + end if - tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(2) + tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end subroutine s_compute_viscous_stress_tensor @@ -599,361 +599,361 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = iy%beg, iy%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j, k, l) - & - q_prim_qp%vf(i)%sf(j - 1, k, l))/ & - (x_cc(j) - x_cc(j - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = iy%beg, iy%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j, k, l) - & + q_prim_qp%vf(i)%sf(j - 1, k, l))/ & + (x_cc(j) - x_cc(j - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j + 1, k, l) - & - q_prim_qp%vf(i)%sf(j, k, l))/ & - (x_cc(j + 1) - x_cc(j)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j + 1, k, l) - & + q_prim_qp%vf(i)%sf(j, k, l))/ & + (x_cc(j + 1) - x_cc(j)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j, l) - & - q_prim_qp%vf(i)%sf(k, j - 1, l))/ & - (y_cc(j) - y_cc(j - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j, l) - & + q_prim_qp%vf(i)%sf(k, j - 1, l))/ & + (y_cc(j) - y_cc(j - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j + 1, l) - & - q_prim_qp%vf(i)%sf(k, j, l))/ & - (y_cc(j + 1) - y_cc(j)) - end do + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j + 1, l) - & + q_prim_qp%vf(i)%sf(k, j, l))/ & + (y_cc(j + 1) - y_cc(j)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) - - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) - end do + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) + + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j) - & - q_prim_qp%vf(i)%sf(k, l, j - 1))/ & - (z_cc(j) - z_cc(j - 1)) - end do + dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j) - & + q_prim_qp%vf(i)%sf(k, l, j - 1))/ & + (z_cc(j) - z_cc(j - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j + 1) - & - q_prim_qp%vf(i)%sf(k, l, j))/ & - (z_cc(j + 1) - z_cc(j)) - end do + dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j + 1) - & + q_prim_qp%vf(i)%sf(k, l, j))/ & + (z_cc(j + 1) - z_cc(j)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() do i = iv%beg, iv%end @@ -1048,42 +1048,42 @@ contains if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1152,42 +1152,42 @@ contains if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1245,21 +1245,21 @@ contains ! spatial derivatives inside the cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(j)) & - *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j - 1, k, l)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(j)) & + *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j - 1, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in x-direction @@ -1274,21 +1274,21 @@ contains ! spatial derivatives inside the cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(k)) & - *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k - 1, l)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(k)) & + *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k - 1, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in y-direction @@ -1303,21 +1303,21 @@ contains ! spatial derivatives inside the cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(l)) & - *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k, l - 1)) - end do + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(l)) & + *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k, l - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1359,148 +1359,148 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_x%sf(j, k, l) = & - (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & - (x_cc(j + 1) - x_cc(j - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_x%sf(j, k, l) = & + (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & + (x_cc(j + 1) - x_cc(j - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_y%sf(j, k, l) = & - (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & - (y_cc(k + 1) - y_cc(k - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_y%sf(j, k, l) = & + (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & + (y_cc(k + 1) - y_cc(k - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_z%sf(j, k, l) = & - (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & - (z_cc(l + 1) - z_cc(l - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_z%sf(j, k, l) = & + (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & + (z_cc(l + 1) - z_cc(l - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(idwbuff(1)%beg, k, l) = & - (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & - (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) - grad_x%sf(idwbuff(1)%end, k, l) = & - (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & - (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(idwbuff(1)%beg, k, l) = & + (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & + (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) + grad_x%sf(idwbuff(1)%end, k, l) = & + (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & + (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, idwbuff(2)%beg, l) = & - (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & - (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) - grad_y%sf(j, idwbuff(2)%end, l) = & - (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & - (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, idwbuff(2)%beg, l) = & + (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & + (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) + grad_y%sf(j, idwbuff(2)%end, l) = & + (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & + (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, idwbuff(3)%beg) = & - (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & - (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) - grad_z%sf(j, k, idwbuff(3)%end) = & - (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & - (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) - end do + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, idwbuff(3)%beg) = & + (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & + (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) + grad_z%sf(j, k, idwbuff(3)%end) = & + (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & + (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if end if if (bc_x%beg <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & - (x_cc(2) - x_cc(0)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & + (x_cc(2) - x_cc(0)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[k,l]', collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & - (x_cc(m) - x_cc(m - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & + (x_cc(m) - x_cc(m - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & - (y_cc(2) - y_cc(0)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & + (y_cc(2) - y_cc(0)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[j,l]', collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & - (y_cc(n) - y_cc(n - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & + (y_cc(n) - y_cc(n - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, 0) = & - (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & - (z_cc(2) - z_cc(0)) - end do + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, 0) = & + (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & + (z_cc(2) - z_cc(0)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(private='[j,k]', collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, p) = & - (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & - (z_cc(p) - z_cc(p - 2)) - end do + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, p) = & + (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & + (z_cc(p) - z_cc(p - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 5a3fd0aec..22d9add5d 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -671,120 +671,120 @@ contains if (weno_order == 1) then if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,beta,dvd,poly,omega,alpha,tau]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - ! reconstruct from left side + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + ! reconstruct from left side + + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) + + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & + + weno_eps + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + elseif (wenoz) then + ! Borges, et al. (2008) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) + tau = abs(beta(1) - beta(0)) + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & - + weno_eps + end if - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) - elseif (wenoz) then - ! Borges, et al. (2008) + ! reconstruct from right side - tau = abs(beta(1) - beta(0)) - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) - end if + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) - - ! reconstruct from right side - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then + elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - end if + end if - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -792,114 +792,114 @@ contains #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3,private='[i,j,k,l,dvd,poly,beta,alpha,omega,tau,delta]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - ! reconstruct from left side + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + ! reconstruct from left side + + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) + + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & + + weno_eps + beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & + + weno_eps + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + elseif (wenoz) then + ! Borges, et al. (2008) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) + tau = abs(beta(2) - beta(0)) ! Equation 25 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & - + weno_eps - beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & - + weno_eps + elseif (teno) then + ! Fu, et al. (2016) + ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 + tau = abs(beta(2) - beta(0)) + alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) + alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) + omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) + delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 + alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + end if - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + omega = alpha/sum(alpha) - elseif (wenoz) then - ! Borges, et al. (2008) + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - tau = abs(beta(2) - beta(0)) ! Equation 25 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) + ! reconstruct from right side - elseif (teno) then - ! Fu, et al. (2016) - ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 - tau = abs(beta(2) - beta(0)) - alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) - alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) - omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) - delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 - alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) - end if + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - ! reconstruct from right side - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then + elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) - end if + end if - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (mp_weno) then @@ -912,190 +912,190 @@ contains #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3,private='[i,j,k,l,poly,beta,alpha,omega,tau,delta,dvd,v]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - - if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity - - if (.not. teno) then - dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 2, k, l, i) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 3, k, l, i) - - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) - - else - ! (Fu, et al., 2016) Table 1 - ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils - ! See Figure 2 (right) for right-sided flux (at i+1/2) - ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point - ! But we need to keep the stencil order to reuse the beta coefficients - poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& - poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& - poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& - poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& - poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& - end if - - if (.not. teno) then - - beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & - + weno_eps - - beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & - + weno_eps - - beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & - + weno_eps - - beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & - + weno_eps - - else ! TENO - ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 - beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& - beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& - beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& - - beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& - + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& - + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& - + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& - + weno_eps !& - - beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& - + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& - + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& - + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& - + weno_eps !& - end if - - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - - elseif (wenoz) then - ! Castro, et al. (2010) - ! Don & Borges (2013) also helps - tau = abs(beta(3) - beta(0)) ! Equation 50 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability - - elseif (teno) then - tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils - alpha = 1._wp + tau/beta - alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 - omega = alpha/sum(alpha) - delta = merge(0._wp, 1._wp, omega < teno_CT) - alpha = delta*d_cbL_${XYZ}$ (:, j) - - end if + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + + if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity + + if (.not. teno) then + dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 2, k, l, i) + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 3, k, l, i) + + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) + + else + ! (Fu, et al., 2016) Table 1 + ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils + ! See Figure 2 (right) for right-sided flux (at i+1/2) + ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point + ! But we need to keep the stencil order to reuse the beta coefficients + poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& + poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& + poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& + poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& + poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& + end if + + if (.not. teno) then + + beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & + + weno_eps + + beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & + + weno_eps + + beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & + + weno_eps + + else ! TENO + ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 + beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& + beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& + beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& + + beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& + + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& + + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& + + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& + + weno_eps !& + + beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& + + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& + + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& + + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& + + weno_eps !& + end if + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + + elseif (wenoz) then + ! Castro, et al. (2010) + ! Don & Borges (2013) also helps + tau = abs(beta(3) - beta(0)) ! Equation 50 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability + + elseif (teno) then + tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils + alpha = 1._wp + tau/beta + alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 omega = alpha/sum(alpha) + delta = merge(0._wp, 1._wp, omega < teno_CT) + alpha = delta*d_cbL_${XYZ}$ (:, j) + + end if - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - if (.not. teno) then - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) - else - poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& - poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& - poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& - poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& - poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& - end if - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) - - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) - - end if + omega = alpha/sum(alpha) + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + + if (.not. teno) then + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) + else + poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& + poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& + poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& + poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& + poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& + end if + + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + + elseif (wenoz) then + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) + + end if + + omega = alpha/sum(alpha) + + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1140,15 +1140,15 @@ contains if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1157,15 +1157,15 @@ contains if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1174,15 +1174,15 @@ contains if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1234,130 +1234,130 @@ contains real(wp), parameter :: beta_mp = 4._wp/3._wp $:GPU_PARALLEL_LOOP(private='[i,j,k,l,d]', collapse=4) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - vL_UL = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*alpha_mp - - vL_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - d_MD)*5.e-1_wp - - vL_LC = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vL_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - min(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - max(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & - *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & - abs(vL_max - vL_rs_vf(j, k, l, i))) - ! END: Left Monotonicity Preserving Bound - - ! Right Monotonicity Preserving Bound - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - vR_UL = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*alpha_mp - - vR_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j + 1, k, l, i) & - - d_MD)*5.e-1_wp - - vR_LC = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vR_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - min(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - max(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & - *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & - abs(vR_max - vR_rs_vf(j, k, l, i))) - ! END: Right Monotonicity Preserving Bound - end do + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + vL_UL = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*alpha_mp + + vL_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - d_MD)*5.e-1_wp + + vL_LC = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vL_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + min(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + max(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & + *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & + abs(vL_max - vL_rs_vf(j, k, l, i))) + ! END: Left Monotonicity Preserving Bound + + ! Right Monotonicity Preserving Bound + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + vR_UL = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*alpha_mp + + vR_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j + 1, k, l, i) & + - d_MD)*5.e-1_wp + + vR_LC = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vR_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + min(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + max(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & + *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & + abs(vR_max - vR_rs_vf(j, k, l, i))) + ! END: Right Monotonicity Preserving Bound end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_preserve_monotonicity From 824585f779e7d657fbd1b1ca5da62eca741672e5 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 11:43:30 -0500 Subject: [PATCH 09/33] Updated macro API documentation --- docs/documentation/gpuParallelization.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index f3d208905..41e0cb89e 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -44,14 +44,16 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using **Macro Invocation** -Uses FYPP eval directive using `#:call` +In order to parallelize a loop, simply place two macro calls on either end of the loop: ```C -#:call GPU_PARALLEL_LOOP(...) +$:$GPU_PARALLEL_LOOP(...) {code} -#:endcall GPU_PARALLEL_LOOP +$:END_GPU_PARALLEL_LOOP() ``` +This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environement and compiler settings. + **Parameters** | name | data type | Default Value | description | From 28a20bb60ffec96cdd92d20b5209be9c311a3bee Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 6 Nov 2025 11:46:01 -0500 Subject: [PATCH 10/33] Spelling --- docs/documentation/gpuParallelization.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/documentation/gpuParallelization.md b/docs/documentation/gpuParallelization.md index 41e0cb89e..af9da2faf 100644 --- a/docs/documentation/gpuParallelization.md +++ b/docs/documentation/gpuParallelization.md @@ -52,7 +52,7 @@ $:$GPU_PARALLEL_LOOP(...) $:END_GPU_PARALLEL_LOOP() ``` -This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environement and compiler settings. +This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environment and compiler settings. **Parameters** From 5f768ec0d253d75a4de60bf1fe27564f28182edc Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Fri, 7 Nov 2025 08:27:20 -0500 Subject: [PATCH 11/33] Found an error in how FYPP processes that resulted in only openMP directives getting printed. Thanks Anand for looking at things with me --- src/common/include/parallel_macros.fpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index 3a1f131b1..bfe4b3bea 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -24,26 +24,28 @@ & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + #:set acc_directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set omp_directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + #if defined(MFC_OpenACC) - #:set directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + $:acc_directive #elif defined(MFC_OpenMP) - #:set directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + $:omp_directive #endif - $:directive - #:enddef #:def END_GPU_PARALLEL_LOOP() + #:set acc_end_directive = '!$acc end parallel loop' + #:set omp_end_directive = END_OMP_PARALLEL_LOOP() + #if defined(MFC_OpenACC) - #:set end_directive = '!$acc end parallel loop' + $:acc_end_directive #elif defined(MFC_OpenMP) - #:set end_directive = END_OMP_PARALLEL_LOOP() + $:omp_end_directive #endif - $:end_directive - #:enddef #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None) From 5ea3506e166b52a6f94bd5cbedb5f96cdffa6cb2 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Fri, 7 Nov 2025 10:14:49 -0500 Subject: [PATCH 12/33] Resolved chemistry issues --- src/common/m_chemistry.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 2e3d66209..afc5696a2 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -129,7 +129,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys, omega, T]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,eqn,Ys, omega, T]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end @@ -191,7 +191,7 @@ contains offsets = 0 offsets(idir) = 1 - $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,i,eqn,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux]', copyin='[offsets]') do z = isc3%beg, isc3%end do y = isc2%beg, isc2%end do x = isc1%beg, isc1%end From 198085106ac4f1eeebe659cb6b31b8faf2d0261c Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Fri, 7 Nov 2025 11:11:24 -0500 Subject: [PATCH 13/33] Ben found my IO error --- src/simulation/m_bubbles_EE.fpp | 8 ++++---- src/simulation/m_bubbles_EL.fpp | 4 ++-- src/simulation/m_bubbles_EL_kernels.fpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 5bfaba6e5..0895a9891 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -76,7 +76,7 @@ contains real(wp) :: nR3bar integer(wp) :: i, j, k, l - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -177,7 +177,7 @@ contains integer :: dmBub_id !< Dummy variables for unified subgrid bubble subroutines real(wp) :: dmMass_v, dmMass_n, dmBeta_c, dmBeta_t, dmCson - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=3) do l = 0, p do k = 0, n do j = 0, m @@ -196,7 +196,7 @@ contains $:END_GPU_PARALLEL_LOOP() adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[j,k,l,Rtmp, Vtmp, myalpha_rho, myalpha]', collapse=3, & + $:GPU_PARALLEL_LOOP(private='[j,k,l,q,Rtmp, Vtmp, myalpha_rho, myalpha]', collapse=3, & & reduction='[[adap_dt_stop_max]]', reductionOp='[MAX]', & & copy='[adap_dt_stop_max]') do l = 0, p @@ -331,7 +331,7 @@ contains if (adap_dt .and. adap_dt_stop_max > 0) call s_mpi_abort("Adaptive time stepping failed to converge.") if (.not. adap_dt) then - $:GPU_PARALLEL_LOOP(private='[i,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[i,k,l,q]', collapse=3) do l = 0, p do q = 0, n do i = 0, m diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 4618ef32b..cafc4797b 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -614,7 +614,7 @@ contains ! Radial motion model adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k,myalpha_rho,myalpha,Re,cell]', & + $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,Re,cell]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs @@ -1511,7 +1511,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 0607af437..e1850846d 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -121,7 +121,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 - $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') + $:GPU_PARALLEL_LOOP(private='[nodecoord,i,j,k,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp From 1d5e33cf527c3804206e72aef8d80080069a689d Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Fri, 7 Nov 2025 11:28:35 -0500 Subject: [PATCH 14/33] Fixed issue with s_get_char_vol being elemental, which means that it was never called, breaking the IO --- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_bubbles_EL_kernels.fpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index cafc4797b..075e6d733 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1511,7 +1511,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + $:GPU_PARALLEL_LOOP(private='[i,j,k,volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index e1850846d..eff9a33cc 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -381,7 +381,7 @@ contains !> The purpose of this procedure is to calculate the characteristic cell volume !! @param cell Computational coordinates (x, y, z) !! @param Charvol Characteristic volume - elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + subroutine s_get_char_vol(cellx, celly, cellz, Charvol) $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & & cray_inline=True) From 723822d0b71c437e734f7ca0fcd386b0e3884a79 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Fri, 7 Nov 2025 11:34:37 -0500 Subject: [PATCH 15/33] All tests pass on wingtip --- src/simulation/m_bubbles_EL.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 075e6d733..210c43f7e 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1511,7 +1511,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:GPU_PARALLEL_LOOP(private='[i,j,k,volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + $:GPU_PARALLEL_LOOP(private='[volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m From 9f946a0e748e5fc4c6a7fc54d8fe72901a35c9a0 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Sun, 16 Nov 2025 10:57:27 -0600 Subject: [PATCH 16/33] Passes GNU tests --- src/common/m_boundary_common.fpp | 4 - src/common/m_variables_conversion.fpp | 617 ++++++++++++------------ src/simulation/m_body_forces.fpp | 1 - src/simulation/m_bubbles_EL_kernels.fpp | 155 +++--- src/simulation/m_ibm.fpp | 2 +- src/simulation/m_muscl.fpp | 2 +- 6 files changed, 384 insertions(+), 397 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 7bef121f4..f4451ab02 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1507,7 +1507,6 @@ contains jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) end do end select - end do end do end do $:END_GPU_PARALLEL_LOOP() @@ -1536,7 +1535,6 @@ contains end select end do end do - end do $:END_GPU_PARALLEL_LOOP() end if @@ -1565,7 +1563,6 @@ contains end select end do end do - end do $:END_GPU_PARALLEL_LOOP() end if @@ -1592,7 +1589,6 @@ contains end select end do end do - end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 55659407e..053f954db 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -855,292 +855,292 @@ contains do j = ibounds(1)%beg, ibounds(1)%end dyn_pres_K = 0._wp - if (igr) then - if (num_fluids == 1) then - alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) - alpha_K(1) = 1._wp + if (igr) then + if (num_fluids == 1) then + alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) + alpha_K(1) = 1._wp + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + + alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) + alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) + end if else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 + do i = 1, num_fluids alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) end do - - alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) - alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) end if - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - end if - if (model_eqns /= 4) then + if (model_eqns /= 4) then #ifdef MFC_SIMULATION - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & + alpha_rho_K, Re_K, G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if #else - ! If pre-processing, use non acc mixture subroutines - if (elasticity) then - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) - else - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K) - end if + ! If pre-processing, use non acc mixture subroutines + if (elasticity) then + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) + else + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K) + end if #endif - end if - - if (relativity) then - if (n == 0) then - B(1) = Bx0 - B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - else - B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) end if - B2 = B(1)**2 + B(2)**2 + B(3)**2 - - m2 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 - end do - S = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) - end do + if (relativity) then + if (n == 0) then + B(1) = Bx0 + B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + else + B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) + end if + B2 = B(1)**2 + B(2)**2 + B(3)**2 - E = qK_cons_vf(E_idx)%sf(j, k, l) + m2 = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 + end do - D = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - D = D + qK_cons_vf(i)%sf(j, k, l) - end do + S = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) + end do - ! Newton-Raphson - W = E + D - $:GPU_LOOP(parallelism='[seq]') - do iter = 1, relativity_cons_to_prim_max_iter - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS - f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D - - ! The first equation below corrects a typo in (Mignone & Bodo, 2006) - ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms - ! This corrected version is not used as the second equation empirically converges faster. - ! First equation is kept for further investigation. - ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) - dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) - - dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) - df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 - - dW = -f/df_dW - W = W + dW - if (abs(dW) < 1.e-12_wp*W) exit - end do + E = qK_cons_vf(E_idx)%sf(j, k, l) - ! Recalculate pressure using converged W - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) + D = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + D = D + qK_cons_vf(i)%sf(j, k, l) + end do - ! Recover the other primitive variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) - end do - qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now + ! Newton-Raphson + W = E + D + $:GPU_LOOP(parallelism='[seq]') + do iter = 1, relativity_cons_to_prim_max_iter + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS + f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D + + ! The first equation below corrects a typo in (Mignone & Bodo, 2006) + ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms + ! This corrected version is not used as the second equation empirically converges faster. + ! First equation is kept for further investigation. + ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) + dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) + + dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) + df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 + + dW = -f/df_dW + W = W + dW + if (abs(dW) < 1.e-12_wp*W) exit + end do - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do + ! Recalculate pressure using converged W + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) - cycle ! skip all the non-relativistic conversions below - end if + ! Recover the other primitive variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) + end do + qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - if (chemistry) then - rho_K = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = rho_K - end do + cycle ! skip all the non-relativistic conversions below + end if - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + if (chemistry) then + rho_K = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) + end do -#ifdef MFC_SIMULATION - rho_K = max(rho_K, sgm_eps) -#endif + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = rho_K + end do - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - if (model_eqns /= 4) then - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /rho_K - dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & - *qK_prim_vf(i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) + end do else - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /qK_cons_vf(1)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do end if - end do - if (chemistry) then +#ifdef MFC_SIMULATION + rho_K = max(rho_K, sgm_eps) +#endif + $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) + do i = momxb, momxe + if (model_eqns /= 4) then + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /rho_K + dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & + *qK_prim_vf(i)%sf(j, k, l) + else + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /qK_cons_vf(1)%sf(j, k, l) + end if end do - T = q_T_sf%sf(j, k, l) - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) + end do - if (mhd) then - if (n == 0) then - pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) + T = q_T_sf%sf(j, k, l) + end if + + if (mhd) then + if (n == 0) then + pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) + else + pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) + end if else - pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) + pres_mag = 0._wp end if - else - pres_mag = 0._wp - end if - call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & - qK_cons_vf(alf_idx)%sf(j, k, l), & - dyn_pres_K, pi_inf_K, gamma_K, rho_K, & - qv_K, rhoYks, pres, T, pres_mag=pres_mag) + call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & + qK_cons_vf(alf_idx)%sf(j, k, l), & + dyn_pres_K, pi_inf_K, gamma_K, rho_K, & + qv_K, rhoYks, pres, T, pres_mag=pres_mag) - qK_prim_vf(E_idx)%sf(j, k, l) = pres + qK_prim_vf(E_idx)%sf(j, k, l) = pres - if (chemistry) then - q_T_sf%sf(j, k, l) = T - end if + if (chemistry) then + q_T_sf%sf(j, k, l) = T + end if - if (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) - end do + if (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) + end do - vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) + vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) - if (qbmm) then - !Get nb (constant across all R0 bins) - nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) + if (qbmm) then + !Get nb (constant across all R0 bins) + nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !Convert cons to prim - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do - !Need to keep track of nb in the primitive variable list (converted back to true value before output) + !Convert cons to prim + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do + !Need to keep track of nb in the primitive variable list (converted back to true value before output) #ifdef MFC_SIMULATION - qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) + qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) #endif - else - if (adv_n) then - qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) - nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) else - call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) + if (adv_n) then + qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) + nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) + else + call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do end if + end if + if (mhd) then $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) end do end if - end if - - if (mhd) then - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if - if (elasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (elasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - ! subtracting elastic contribution for pressure calculation - if (G_K > verysmall) then - if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) - qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & - ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - ! Double for shear stresses - if (any(i == shear_indices)) then + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + ! subtracting elastic contribution for pressure calculation + if (G_K > verysmall) then + if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + ! Double for shear stresses + if (any(i == shear_indices)) then + qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & + ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + end if end if - end if - end do - end if + end do + end if - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = xibeg, xiend - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = xibeg, xiend + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (.not. igr .or. num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + if (.not. igr .or. num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (surface_tension) then - qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) - end if + if (surface_tension) then + qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) + end if - if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) + if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) #ifdef MFC_POST_PROCESS - if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) #endif + end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end subroutine s_convert_conservative_to_primitive_variables @@ -1468,119 +1468,112 @@ contains ! Computing the flux variables from the primitive variables, without ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION -<<<<<<< HEAD - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,alpha_rho_K, vel_K, alpha_K, Re_K, Y_K]') - do l = is3b, is3e - do k = is2b, is2e - do j = is1b, is1e -======= $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, alpha_K, Re_K, Y_K, rho_K, vel_K_sum, pres_K, E_K, gamma_K, pi_inf_K, qv_K, G_K, T_K, mix_mol_weight, R_gas]') do l = is3b, is3e do k = is2b, is2e do j = is1b, is1e ->>>>>>> master - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_K(i) = qK_prim_vf(j, k, l, i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K(i) = qK_prim_vf(j, k, l, contxe + i) - end do - vel_K_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K_sum = vel_K_sum + vel_K(i)**2._wp - end do - - pres_K = qK_prim_vf(j, k, l, E_idx) - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, & - G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & - pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_K(i) = qK_prim_vf(j, k, l, i) + end do - ! Computing the energy from the pressure + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K(i) = qK_prim_vf(j, k, l, contxe + i) + end do - if (chemistry) then + vel_K_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) + do i = 1, num_vels + vel_K_sum = vel_K_sum + vel_K(i)**2._wp end do - !Computing the energy from the internal energy of the mixture - call get_mixture_molecular_weight(Y_k, mix_mol_weight) - R_gas = gas_constant/mix_mol_weight - T_K = pres_K/rho_K/R_gas - call get_mixture_energy_mass(T_K, Y_K, E_K) - E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum - else - ! Computing the energy from the pressure - E_K = gamma_K*pres_K + pi_inf_K & - + 5.e-1_wp*rho_K*vel_K_sum + qv_K - end if - ! mass flux, this should be \alpha_i \rho_i u_i - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) - end do + pres_K = qK_prim_vf(j, k, l, E_idx) + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K, & + G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & + pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - FK_vf(j, k, l, contxe + dir_idx(i)) = & - rho_K*vel_K(dir_idx(1)) & - *vel_K(dir_idx(i)) & - + pres_K*dir_flg(dir_idx(i)) - end do + ! Computing the energy from the pressure - ! energy flux, u(E+p) - FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) + end do + !Computing the energy from the internal energy of the mixture + call get_mixture_molecular_weight(Y_k, mix_mol_weight) + R_gas = gas_constant/mix_mol_weight + T_K = pres_K/rho_K/R_gas + call get_mixture_energy_mass(T_K, Y_K, E_K) + E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum + else + ! Computing the energy from the pressure + E_K = gamma_K*pres_K + pi_inf_K & + + 5.e-1_wp*rho_K*vel_K_sum + qv_K + end if - ! Species advection Flux, \rho*u*Y - if (chemistry) then + ! mass flux, this should be \alpha_i \rho_i u_i $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) + do i = 1, contxe + FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) end do - end if - if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = 0._wp - FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) + do i = 1, num_vels + FK_vf(j, k, l, contxe + dir_idx(i)) = & + rho_K*vel_K(dir_idx(1)) & + *vel_K(dir_idx(i)) & + + pres_K*dir_flg(dir_idx(i)) end do - else - ! Could be bubbles_euler! - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) - end do + ! energy flux, u(E+p) + FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) - end do + ! Species advection Flux, \rho*u*Y + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_species + FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) + end do + end if - end if + if (riemann_solver == 1 .or. riemann_solver == 4) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = 0._wp + FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) + end do + + else + ! Could be bubbles_euler! + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) + end do + + end if + + end do end do end do - end do $:END_GPU_PARALLEL_LOOP() #endif end subroutine s_convert_primitive_to_flux_variables @@ -1708,4 +1701,4 @@ contains end subroutine s_compute_fast_magnetosonic_speed #endif -end module m_variables_conversion +end module m_variables_conversion \ No newline at end of file diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 35e12e4e2..7c2d2b12f 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -158,7 +158,6 @@ contains rhoM(j, k, l)*accel_bf(3) rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) - end do end do end do end do diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 6b2a4d5d5..ea531957e 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -56,40 +56,40 @@ contains integer :: l $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') - do l = 1, nBubs + do l = 1, nBubs - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - call s_get_cell(s_coord, cell) + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + call s_get_cell(s_coord, cell) - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if - - !Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cell(1), cell(2), cell(3)) = updatedvar(1)%sf(cell(1), cell(2), cell(3)) + real(addFun1, kind=stp) + if (num_dims == 2) then + Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + end if - !Update time derivative of void fraction - addFun2 = strength_vel/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cell(1), cell(2), cell(3)) = updatedvar(2)%sf(cell(1), cell(2), cell(3)) + real(addFun2, kind=stp) + !Update void fraction field + addFun1 = strength_vol/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar(1)%sf(cell(1), cell(2), cell(3)) = updatedvar(1)%sf(cell(1), cell(2), cell(3)) + real(addFun1, kind=stp) - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol + !Update time derivative of void fraction + addFun2 = strength_vel/Vol $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cell(1), cell(2), cell(3)) = updatedvar(5)%sf(cell(1), cell(2), cell(3)) + real(addFun3, kind=stp) - end if - end do + updatedvar(2)%sf(cell(1), cell(2), cell(3)) = updatedvar(2)%sf(cell(1), cell(2), cell(3)) + real(addFun2, kind=stp) + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = (strength_vol*strength_vel)/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar(5)%sf(cell(1), cell(2), cell(3)) = updatedvar(5)%sf(cell(1), cell(2), cell(3)) + real(addFun3, kind=stp) + end if + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_deltafunc @@ -122,56 +122,56 @@ contains if (p == 0) smearGridz = 1 $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - center(1:2) = lbk_pos(l, 1:2, 2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - - !Check if the cells intended to smear the bubbles in are in the computational domain - !and redefine the cells for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) + do l = 1, nBubs + nodecoord(1:3) = 0 + center(1:3) = 0._wp + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + center(1:2) = lbk_pos(l, 1:2, 2) + if (p > 0) center(3) = lbk_pos(l, 3, 2) + call s_get_cell(s_coord, cell) + call s_compute_stddsv(cell, volpart, stddsv) + + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + + $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') + do i = 1, smearGrid + do j = 1, smearGrid + do k = 1, smearGridz + cellaux(1) = cell(1) + i - (mapCells + 1) + cellaux(2) = cell(2) + j - (mapCells + 1) + cellaux(3) = cell(3) + k - (mapCells + 1) if (p == 0) cellaux(3) = 0 - end if - !Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') + !Check if the cells intended to smear the bubbles in are in the computational domain + !and redefine the cells for symmetric boundary + call s_check_celloutside(cellaux, celloutside) + + if (.not. celloutside) then + + nodecoord(1) = x_cc(cellaux(1)) + nodecoord(2) = y_cc(cellaux(2)) + if (p > 0) nodecoord(3) = z_cc(cellaux(3)) + call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) + if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) + + ! Relocate cells for bubbles intersecting symmetric boundaries + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then + call s_shift_cell_symmetric_bc(cellaux, cell) + end if + else + func = 0._wp + func2 = 0._wp + cellaux(1) = cell(1) + cellaux(2) = cell(2) + cellaux(3) = cell(3) + if (p == 0) cellaux(3) = 0 + end if + + !Update void fraction field + addFun1 = func*strength_vol + $:GPU_ATOMIC(atomic='update') updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & + real(addFun1, kind=stp) @@ -196,7 +196,6 @@ contains end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end subroutine s_gaussian @@ -420,4 +419,4 @@ contains end subroutine s_get_cell -end module m_bubbles_EL_kernels +end module m_bubbles_EL_kernels \ No newline at end of file diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 378782962..11f73efd1 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -198,7 +198,7 @@ contains type(ghost_point) :: innerp if (num_gps > 0) then - #$:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q,qv_K,c_IP,nbub,patch_id]') + $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q,qv_K,c_IP,nbub,patch_id]') do i = 1, num_gps gp = ghost_points(i) diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index 4e50ab11f..9936e4e6b 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -118,7 +118,7 @@ contains if (muscl_order == 1) then if (muscl_dir == 1) then - #: call GPU_PARALLEL_LOOP(collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_muscl%beg, is3_muscl%end do k = is2_muscl%beg, is2_muscl%end From 07d558baaa1cfdcc15a0abfb0264d49650edc522 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Sun, 16 Nov 2025 11:22:51 -0600 Subject: [PATCH 17/33] Formatting and passed all GNU tests --- src/common/m_boundary_common.fpp | 586 +-- src/common/m_chemistry.fpp | 214 +- src/common/m_mpi_common.fpp | 366 +- src/common/m_variables_conversion.fpp | 626 +-- src/simulation/m_acoustic_src.fpp | 8 +- src/simulation/m_body_forces.fpp | 14 +- src/simulation/m_bubbles_EE.fpp | 34 +- src/simulation/m_bubbles_EL.fpp | 148 +- src/simulation/m_bubbles_EL_kernels.fpp | 191 +- src/simulation/m_cbc.fpp | 549 +- src/simulation/m_fftw.fpp | 134 +- src/simulation/m_hyperelastic.fpp | 180 +- src/simulation/m_hypoelastic.fpp | 118 +- src/simulation/m_ibm.fpp | 38 +- src/simulation/m_igr.fpp | 3370 ++++++------ src/simulation/m_mhd.fpp | 90 +- src/simulation/m_muscl.fpp | 408 +- src/simulation/m_qbmm.fpp | 6 +- src/simulation/m_riemann_solvers.fpp | 6190 +++++++++++------------ src/simulation/m_start_up.fpp | 14 +- src/simulation/m_surface_tension.fpp | 280 +- src/simulation/m_time_steppers.fpp | 186 +- src/simulation/m_viscous.fpp | 1216 ++--- src/simulation/m_weno.fpp | 908 ++-- 24 files changed, 7919 insertions(+), 7955 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index f4451ab02..edff5edc3 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -92,27 +92,27 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, 1)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = 0, n + select case (int(bc_type(1, 1)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 1)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in) + end if end do end do $:END_GPU_PARALLEL_LOOP() @@ -122,27 +122,27 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (int(bc_type(1, 2)%sf(0, k, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 1, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 1, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(1, 2)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = 0, n + select case (int(bc_type(1, 2)%sf(0, k, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) ! Ghost-cell extrap. BC at end + call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 1, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 1, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(1, 2)%sf(0, k, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in) + end if end do end do $:END_GPU_PARALLEL_LOOP() @@ -156,30 +156,30 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) - case (BC_AXIS) - call s_axis(q_prim_vf, pb_in, mv_in, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & - (bc_type(2, 1)%sf(k, 0, l) /= BC_AXIS)) then - call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) + case (BC_AXIS) + call s_axis(q_prim_vf, pb_in, mv_in, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, 1)%sf(k, 0, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) + end if end do end do $:END_GPU_PARALLEL_LOOP() @@ -189,27 +189,27 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 2)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 2)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) - end if + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 2)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 2)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) + end if end do end do $:END_GPU_PARALLEL_LOOP() @@ -225,29 +225,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, 1)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, -1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) - end if - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, 1)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, -1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 1)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -255,29 +255,29 @@ contains call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (int(bc_type(3, 2)%sf(k, l, 0))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) - case (BC_SlIP_WALL) - call s_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 3, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 3, 1, k, l) - end select - - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(3, 2)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) - end if - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, 2)%sf(k, l, 0))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in) + case (BC_SlIP_WALL) + call s_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 3, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 3, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(3, 2)%sf(k, l, 0) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in) + end if end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -1172,16 +1172,16 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else $:END_GPU_PARALLEL_LOOP() - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) - end select + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l) + end select end do end do $:END_GPU_PARALLEL_LOOP() @@ -1191,16 +1191,16 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else $:END_GPU_PARALLEL_LOOP() - do l = 0, p - do k = 0, n - select case (bc_type(1, 2)%sf(0, k, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 1, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 1, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) - end select + do l = 0, p + do k = 0, n + select case (bc_type(1, 2)%sf(0, k, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 1, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 1, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l) + end select end do end do $:END_GPU_PARALLEL_LOOP() @@ -1213,16 +1213,16 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else $:END_GPU_PARALLEL_LOOP() - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) - end select + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) + end select end do end do $:END_GPU_PARALLEL_LOOP() @@ -1232,16 +1232,16 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else $:END_GPU_PARALLEL_LOOP() - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 2)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) - end select + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 2)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) + end select end do end do $:END_GPU_PARALLEL_LOOP() @@ -1255,18 +1255,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else $:END_GPU_PARALLEL_LOOP() - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) - end select - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, 1)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1274,18 +1274,18 @@ contains call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else $:END_GPU_PARALLEL_LOOP() - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - select case (bc_type(3, 2)%sf(k, l, 0)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 3, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 3, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) - end select - end do + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (bc_type(3, 2)%sf(k, l, 0)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 3, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 3, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l) + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -1491,22 +1491,22 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 1)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) - end do - end select + do l = 0, p + do k = 0, n + select case (bc_type(1, 1)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l) + end do + end select end do end do $:END_GPU_PARALLEL_LOOP() @@ -1517,24 +1517,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = 0, n - select case (bc_type(1, 2)%sf(0, k, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) - end do - end select - end do + do l = 0, p + do k = 0, n + select case (bc_type(1, 2)%sf(0, k, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1545,24 +1545,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) - end do - end select - end do + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1571,52 +1571,52 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 2)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) + end do + end select + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (p == 0) then + return + else if (bc_z%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = idwbuff(2)%beg, idwbuff(2)%end do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 2)%sf(k, 0, l)) + select case (bc_type(3, 1)%sf(k, l, 0)) case (BC_PERIODIC) do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) end do case (BC_REFLECTIVE) do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) end do case default do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) + jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) end do end select end do end do - $:END_GPU_PARALLEL_LOOP() - end if - - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (p == 0) then - return - else if (bc_z%beg >= 0) then - call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, 1)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0) - end do - end select - end do - end do $:END_GPU_PARALLEL_LOOP() end if @@ -1624,24 +1624,24 @@ contains call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1) else $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = idwbuff(2)%beg, idwbuff(2)%end - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(3, 2)%sf(k, l, 0)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) - end do - end select - end do + do l = idwbuff(2)%beg, idwbuff(2)%end + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(3, 2)%sf(k, l, 0)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1)) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p) + end do + end select end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 1c1ffae54..b43905dc7 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -129,10 +129,10 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, eqn, T, rho, omega, omega_m]', copyin='[bounds]') - do z = bounds(3)%beg, bounds(3)%end - do y = bounds(2)%beg, bounds(2)%end - do x = bounds(1)%beg, bounds(1)%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, eqn, T, rho, omega, omega_m]', copyin='[bounds]') + do z = bounds(3)%beg, bounds(3)%end + do y = bounds(2)%beg, bounds(2)%end + do x = bounds(1)%beg, bounds(1)%end $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe @@ -192,112 +192,112 @@ contains offsets(idir) = 1 #:block UNDEF_AMD $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux, Mass_Diffu_Energy, MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, T_L, T_R, P_L, P_R, rho_L, rho_R, rho_cell, rho_Vic, lambda_L, lambda_R, lambda_Cell, dT_dxi, grid_spacing]', copyin='[offsets]') - do z = isc3%beg, isc3%end - do y = isc2%beg, isc2%end - do x = isc1%beg, isc1%end - ! Calculate grid spacing using direction-based indexing - select case (idir) - case (1) - grid_spacing = x_cc(x + 1) - x_cc(x) - case (2) - grid_spacing = y_cc(y + 1) - y_cc(y) - case (3) - grid_spacing = z_cc(z + 1) - z_cc(z) - end select - - ! Extract species mass fractions - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) - Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) - end do - - ! Calculate molecular weights and mole fractions - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - MW_cell = 0.5_wp*(MW_L + MW_R) - - call get_mole_fractions(MW_L, Ys_L, Xs_L) - call get_mole_fractions(MW_R, Ys_R, Xs_R) - - ! Calculate gas constants and thermodynamic properties - Rgas_L = gas_constant/MW_L - Rgas_R = gas_constant/MW_R - - P_L = q_prim_qp(E_idx)%sf(x, y, z) - P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - rho_L = q_prim_qp(1)%sf(x, y, z) - rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - T_L = P_L/rho_L/Rgas_L - T_R = P_R/rho_R/Rgas_R - - rho_cell = 0.5_wp*(rho_L + rho_R) - dT_dxi = (T_R - T_L)/grid_spacing - - ! Get transport properties - call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) - call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) - - call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) - call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) - - call get_species_enthalpies_rt(T_L, h_l) - call get_species_enthalpies_rt(T_R, h_r) - - ! Calculate species properties and gradients - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) - Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) - h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) - dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing - end do - - ! Calculate mixture-averaged diffusivities - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & - (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp - end do - - lambda_Cell = 0.5_wp*(lambda_R + lambda_L) - - ! Calculate mass diffusion fluxes - rho_Vic = 0.0_wp - Mass_Diffu_Energy = 0.0_wp - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) - rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) - Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) - end do - - ! Apply corrections for mass conservation - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic - Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) - end do - - ! Add thermal conduction contribution - Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy - - ! Update flux arrays - flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) - end do + do z = isc3%beg, isc3%end + do y = isc2%beg, isc2%end + do x = isc1%beg, isc1%end + ! Calculate grid spacing using direction-based indexing + select case (idir) + case (1) + grid_spacing = x_cc(x + 1) - x_cc(x) + case (2) + grid_spacing = y_cc(y + 1) - y_cc(y) + case (3) + grid_spacing = z_cc(z + 1) - z_cc(z) + end select + + ! Extract species mass fractions + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) + Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) + end do + + ! Calculate molecular weights and mole fractions + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + MW_cell = 0.5_wp*(MW_L + MW_R) + + call get_mole_fractions(MW_L, Ys_L, Xs_L) + call get_mole_fractions(MW_R, Ys_R, Xs_R) + + ! Calculate gas constants and thermodynamic properties + Rgas_L = gas_constant/MW_L + Rgas_R = gas_constant/MW_R + + P_L = q_prim_qp(E_idx)%sf(x, y, z) + P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + rho_L = q_prim_qp(1)%sf(x, y, z) + rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + T_L = P_L/rho_L/Rgas_L + T_R = P_R/rho_R/Rgas_R + + rho_cell = 0.5_wp*(rho_L + rho_R) + dT_dxi = (T_R - T_L)/grid_spacing + + ! Get transport properties + call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) + call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) + + call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) + call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) + + call get_species_enthalpies_rt(T_L, h_l) + call get_species_enthalpies_rt(T_R, h_r) + + ! Calculate species properties and gradients + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) + Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) + h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) + dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing + end do + + ! Calculate mixture-averaged diffusivities + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & + (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp + end do + + lambda_Cell = 0.5_wp*(lambda_R + lambda_L) + + ! Calculate mass diffusion fluxes + rho_Vic = 0.0_wp + Mass_Diffu_Energy = 0.0_wp + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) + Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) + end do + + ! Apply corrections for mass conservation + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) + end do + + ! Add thermal conduction contribution + Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy + + ! Update flux arrays + flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_diffu_Flux(eqn - chemxb + 1) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endblock UNDEF_AMD end if diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 1258fef74..226ab1b4c 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -758,151 +758,151 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) - end do + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:else $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) - end do + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -959,174 +959,174 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:else ! Unpacking buffer from bc_z%beg $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -1871,4 +1871,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common \ No newline at end of file +end module m_mpi_common diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 053f954db..85f67069e 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -850,297 +850,297 @@ contains integer :: iter ! Newton-Raphson iteration counter $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_K, alpha_rho_K, Re_K, nRtmp, rho_K, gamma_K, pi_inf_K,qv_K, dyn_pres_K, rhoYks, B, pres, vftmp, nbub_sc, G_K, T, pres_mag, Ga, B2, m2, S, W, dW, E, D, f, dGa_dW, dp_dW, df_dW, iter ]') - do l = ibounds(3)%beg, ibounds(3)%end - do k = ibounds(2)%beg, ibounds(2)%end - do j = ibounds(1)%beg, ibounds(1)%end - dyn_pres_K = 0._wp - - if (igr) then - if (num_fluids == 1) then - alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) - alpha_K(1) = 1._wp - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) - alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) - end do - - alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) - alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) - end if + do l = ibounds(3)%beg, ibounds(3)%end + do k = ibounds(2)%beg, ibounds(2)%end + do j = ibounds(1)%beg, ibounds(1)%end + dyn_pres_K = 0._wp + + if (igr) then + if (num_fluids == 1) then + alpha_rho_K(1) = qK_cons_vf(contxb)%sf(j, k, l) + alpha_K(1) = 1._wp else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) end do + + alpha_rho_K(num_fluids) = qK_cons_vf(num_fluids)%sf(j, k, l) + alpha_K(num_fluids) = 1._wp - sum(alpha_K(1:num_fluids - 1)) end if + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_K(i) = qK_cons_vf(i)%sf(j, k, l) + alpha_K(i) = qK_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + end if - if (model_eqns /= 4) then + if (model_eqns /= 4) then #ifdef MFC_SIMULATION - ! If in simulation, use acc mixture subroutines - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & - alpha_rho_K, Re_K, G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! If in simulation, use acc mixture subroutines + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, alpha_K, & + alpha_rho_K, Re_K, G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if #else - ! If pre-processing, use non acc mixture subroutines - if (elasticity) then - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) - else - call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & - rho_K, gamma_K, pi_inf_K, qv_K) - end if -#endif + ! If pre-processing, use non acc mixture subroutines + if (elasticity) then + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K, Re_K, G_K, fluid_pp(:)%G) + else + call s_convert_to_mixture_variables(qK_cons_vf, j, k, l, & + rho_K, gamma_K, pi_inf_K, qv_K) end if +#endif + end if - if (relativity) then - if (n == 0) then - B(1) = Bx0 - B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - else - B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) - B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) - B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) - end if - B2 = B(1)**2 + B(2)**2 + B(3)**2 - - m2 = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 - end do + if (relativity) then + if (n == 0) then + B(1) = Bx0 + B(2) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + else + B(1) = qK_cons_vf(B_idx%beg)%sf(j, k, l) + B(2) = qK_cons_vf(B_idx%beg + 1)%sf(j, k, l) + B(3) = qK_cons_vf(B_idx%beg + 2)%sf(j, k, l) + end if + B2 = B(1)**2 + B(2)**2 + B(3)**2 - S = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) - end do + m2 = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + m2 = m2 + qK_cons_vf(i)%sf(j, k, l)**2 + end do - E = qK_cons_vf(E_idx)%sf(j, k, l) + S = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + S = S + qK_cons_vf(momxb + i - 1)%sf(j, k, l)*B(i) + end do - D = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - D = D + qK_cons_vf(i)%sf(j, k, l) - end do + E = qK_cons_vf(E_idx)%sf(j, k, l) - ! Newton-Raphson - W = E + D - $:GPU_LOOP(parallelism='[seq]') - do iter = 1, relativity_cons_to_prim_max_iter - Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS - f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D - - ! The first equation below corrects a typo in (Mignone & Bodo, 2006) - ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms - ! This corrected version is not used as the second equation empirically converges faster. - ! First equation is kept for further investigation. - ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) - dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) - - dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) - df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 - - dW = -f/df_dW - W = W + dW - if (abs(dW) < 1.e-12_wp*W) exit - end do + D = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + D = D + qK_cons_vf(i)%sf(j, k, l) + end do - ! Recalculate pressure using converged W + ! Newton-Raphson + W = E + D + $:GPU_LOOP(parallelism='[seq]') + do iter = 1, relativity_cons_to_prim_max_iter Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) - qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) + pres = (W - D*Ga)/((gamma_K + 1)*Ga**2) ! Thermal pressure from EOS + f = W - pres + (1 - 1/(2*Ga**2))*B2 - S**2/(2*W**2) - E - D + + ! The first equation below corrects a typo in (Mignone & Bodo, 2006) + ! m2*W**2 → 2*m2*W**2, which would cancel with the 2* in other terms + ! This corrected version is not used as the second equation empirically converges faster. + ! First equation is kept for further investigation. + ! dGa_dW = -Ga**3 * ( S**2*(3*W**2+3*W*B2+B2**2) + m2*W**2 ) / (W**3 * (W+B2)**3) ! first (corrected) + dGa_dW = -Ga**3*(2*S**2*(3*W**2 + 3*W*B2 + B2**2) + m2*W**2)/(2*W**3*(W + B2)**3) ! second (in paper) + + dp_dW = (Ga*(1 + D*dGa_dW) - 2*W*dGa_dW)/((gamma_K + 1)*Ga**3) + df_dW = 1 - dp_dW + (B2/Ga**3)*dGa_dW + S**2/W**3 + + dW = -f/df_dW + W = W + dW + if (abs(dW) < 1.e-12_wp*W) exit + end do - ! Recover the other primitive variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) - end do - qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now + ! Recalculate pressure using converged W + Ga = (W + B2)*W/sqrt((W + B2)**2*W**2 - (m2*W**2 + S**2*(2*W + B2))) + qK_prim_vf(E_idx)%sf(j, k, l) = (W - D*Ga)/((gamma_K + 1)*Ga**2) - $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do + ! Recover the other primitive variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + qK_prim_vf(momxb + i - 1)%sf(j, k, l) = (qK_cons_vf(momxb + i - 1)%sf(j, k, l) + (S/W)*B(i))/(W + B2) + end do + qK_prim_vf(1)%sf(j, k, l) = D/Ga ! Hard-coded for single-component for now - cycle ! skip all the non-relativistic conversions below - end if + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do - if (chemistry) then - rho_K = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) - end do + cycle ! skip all the non-relativistic conversions below + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = rho_K - end do + if (chemistry) then + rho_K = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + rho_K = rho_K + max(0._wp, qK_cons_vf(i)%sf(j, k, l)) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = rho_K + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + qK_prim_vf(i)%sf(j, k, l) = max(0._wp, qK_cons_vf(i)%sf(j, k, l)/rho_K) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if #ifdef MFC_SIMULATION - rho_K = max(rho_K, sgm_eps) + rho_K = max(rho_K, sgm_eps) #endif + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + if (model_eqns /= 4) then + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /rho_K + dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & + *qK_prim_vf(i)%sf(j, k, l) + else + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & + /qK_cons_vf(1)%sf(j, k, l) + end if + end do + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - if (model_eqns /= 4) then - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /rho_K - dyn_pres_K = dyn_pres_K + 5.e-1_wp*qK_cons_vf(i)%sf(j, k, l) & - *qK_prim_vf(i)%sf(j, k, l) - else - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) & - /qK_cons_vf(1)%sf(j, k, l) - end if + do i = 1, num_species + rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) end do - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - rhoYks(i) = qK_cons_vf(chemxb + i - 1)%sf(j, k, l) - end do - - T = q_T_sf%sf(j, k, l) - end if + T = q_T_sf%sf(j, k, l) + end if - if (mhd) then - if (n == 0) then - pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) - else - pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) - end if + if (mhd) then + if (n == 0) then + pres_mag = 0.5_wp*(Bx0**2 + qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2) else - pres_mag = 0._wp + pres_mag = 0.5_wp*(qK_cons_vf(B_idx%beg)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 1)%sf(j, k, l)**2 + qK_cons_vf(B_idx%beg + 2)%sf(j, k, l)**2) end if + else + pres_mag = 0._wp + end if - call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & - qK_cons_vf(alf_idx)%sf(j, k, l), & - dyn_pres_K, pi_inf_K, gamma_K, rho_K, & - qv_K, rhoYks, pres, T, pres_mag=pres_mag) + call s_compute_pressure(qK_cons_vf(E_idx)%sf(j, k, l), & + qK_cons_vf(alf_idx)%sf(j, k, l), & + dyn_pres_K, pi_inf_K, gamma_K, rho_K, & + qv_K, rhoYks, pres, T, pres_mag=pres_mag) - qK_prim_vf(E_idx)%sf(j, k, l) = pres + qK_prim_vf(E_idx)%sf(j, k, l) = pres - if (chemistry) then - q_T_sf%sf(j, k, l) = T - end if + if (chemistry) then + q_T_sf%sf(j, k, l) = T + end if - if (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) - end do + if (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nRtmp(i) = qK_cons_vf(bubrs_vc(i))%sf(j, k, l) + end do - vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) + vftmp = qK_cons_vf(alf_idx)%sf(j, k, l) - if (qbmm) then - !Get nb (constant across all R0 bins) - nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) + if (qbmm) then + !Get nb (constant across all R0 bins) + nbub_sc = qK_cons_vf(bubxb)%sf(j, k, l) - !Convert cons to prim - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do - !Need to keep track of nb in the primitive variable list (converted back to true value before output) + !Convert cons to prim + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc + end do + !Need to keep track of nb in the primitive variable list (converted back to true value before output) #ifdef MFC_SIMULATION - qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) + qK_prim_vf(bubxb)%sf(j, k, l) = qK_cons_vf(bubxb)%sf(j, k, l) #endif + else + if (adv_n) then + qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) + nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) else - if (adv_n) then - qK_prim_vf(n_idx)%sf(j, k, l) = qK_cons_vf(n_idx)%sf(j, k, l) - nbub_sc = qK_prim_vf(n_idx)%sf(j, k, l) - else - call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc - end do + call s_comp_n_from_cons(vftmp, nRtmp, nbub_sc, weight) end if - end if - if (mhd) then $:GPU_LOOP(parallelism='[seq]') - do i = B_idx%beg, B_idx%end - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + do i = bubxb, bubxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/nbub_sc end do end if + end if - if (elasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (mhd) then + $:GPU_LOOP(parallelism='[seq]') + do i = B_idx%beg, B_idx%end + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - ! subtracting elastic contribution for pressure calculation - if (G_K > verysmall) then - if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) + if (elasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if + + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + ! subtracting elastic contribution for pressure calculation + if (G_K > verysmall) then + if (cont_damage) G_K = G_K*max((1._wp - qK_cons_vf(damage_idx)%sf(j, k, l)), 0._wp) + qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & + ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K + ! Double for shear stresses + if (any(i == shear_indices)) then qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - ! Double for shear stresses - if (any(i == shear_indices)) then - qK_prim_vf(E_idx)%sf(j, k, l) = qK_prim_vf(E_idx)%sf(j, k, l) - & - ((qK_prim_vf(i)%sf(j, k, l)**2._wp)/(4._wp*G_K))/gamma_K - end if end if - end do - end if + end if + end do + end if - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = xibeg, xiend - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K - end do - end if + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = xibeg, xiend + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l)/rho_K + end do + end if - if (.not. igr .or. num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) - end do - end if + if (.not. igr .or. num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + qK_prim_vf(i)%sf(j, k, l) = qK_cons_vf(i)%sf(j, k, l) + end do + end if - if (surface_tension) then - qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) - end if + if (surface_tension) then + qK_prim_vf(c_idx)%sf(j, k, l) = qK_cons_vf(c_idx)%sf(j, k, l) + end if - if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) + if (cont_damage) qK_prim_vf(damage_idx)%sf(j, k, l) = qK_cons_vf(damage_idx)%sf(j, k, l) #ifdef MFC_POST_PROCESS - if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) + if (bubbles_lagrange) qK_prim_vf(beta_idx)%sf(j, k, l) = qK_cons_vf(beta_idx)%sf(j, k, l) #endif - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_convert_conservative_to_primitive_variables @@ -1469,111 +1469,111 @@ contains ! accounting for the contribution of either viscosity or capillarity #ifdef MFC_SIMULATION $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_K, vel_K, alpha_K, Re_K, Y_K, rho_K, vel_K_sum, pres_K, E_K, gamma_K, pi_inf_K, qv_K, G_K, T_K, mix_mol_weight, R_gas]') - do l = is3b, is3e - do k = is2b, is2e - do j = is1b, is1e + do l = is3b, is3e + do k = is2b, is2e + do j = is1b, is1e - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_K(i) = qK_prim_vf(j, k, l, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_K(i) = qK_prim_vf(j, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K(i) = qK_prim_vf(j, k, l, contxe + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + alpha_K(i - E_idx) = qK_prim_vf(j, k, l, i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K(i) = qK_prim_vf(j, k, l, contxe + i) + end do + + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_K_sum = vel_K_sum + vel_K(i)**2._wp + end do - vel_K_sum = 0._wp + pres_K = qK_prim_vf(j, k, l, E_idx) + if (elasticity) then + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K, & + G_K, Gs_vc) + else if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & + pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) + else + call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & + alpha_K, alpha_rho_K, Re_K) + end if + + ! Computing the energy from the pressure + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_K_sum = vel_K_sum + vel_K(i)**2._wp + do i = chemxb, chemxe + Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) end do + !Computing the energy from the internal energy of the mixture + call get_mixture_molecular_weight(Y_k, mix_mol_weight) + R_gas = gas_constant/mix_mol_weight + T_K = pres_K/rho_K/R_gas + call get_mixture_energy_mass(T_K, Y_K, E_K) + E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum + else + ! Computing the energy from the pressure + E_K = gamma_K*pres_K + pi_inf_K & + + 5.e-1_wp*rho_K*vel_K_sum + qv_K + end if - pres_K = qK_prim_vf(j, k, l, E_idx) - if (elasticity) then - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K, & - G_K, Gs_vc) - else if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho_K, gamma_K, & - pi_inf_K, qv_K, alpha_K, alpha_rho_K, Re_K) - else - call s_convert_species_to_mixture_variables_acc(rho_K, gamma_K, pi_inf_K, qv_K, & - alpha_K, alpha_rho_K, Re_K) - end if + ! mass flux, this should be \alpha_i \rho_i u_i + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) + end do - ! Computing the energy from the pressure + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + FK_vf(j, k, l, contxe + dir_idx(i)) = & + rho_K*vel_K(dir_idx(1)) & + *vel_K(dir_idx(i)) & + + pres_K*dir_flg(dir_idx(i)) + end do - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_K(i - chemxb + 1) = qK_prim_vf(j, k, l, i) - end do - !Computing the energy from the internal energy of the mixture - call get_mixture_molecular_weight(Y_k, mix_mol_weight) - R_gas = gas_constant/mix_mol_weight - T_K = pres_K/rho_K/R_gas - call get_mixture_energy_mass(T_K, Y_K, E_K) - E_K = rho_K*E_K + 5.e-1_wp*rho_K*vel_K_sum - else - ! Computing the energy from the pressure - E_K = gamma_K*pres_K + pi_inf_K & - + 5.e-1_wp*rho_K*vel_K_sum + qv_K - end if + ! energy flux, u(E+p) + FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - ! mass flux, this should be \alpha_i \rho_i u_i + ! Species advection Flux, \rho*u*Y + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - FK_vf(j, k, l, i) = alpha_rho_K(i)*vel_K(dir_idx(1)) + do i = 1, num_species + FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) end do + end if + if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - FK_vf(j, k, l, contxe + dir_idx(i)) = & - rho_K*vel_K(dir_idx(1)) & - *vel_K(dir_idx(i)) & - + pres_K*dir_flg(dir_idx(i)) + do i = advxb, advxe + FK_vf(j, k, l, i) = 0._wp + FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) end do - ! energy flux, u(E+p) - FK_vf(j, k, l, E_idx) = vel_K(dir_idx(1))*(E_K + pres_K) - - ! Species advection Flux, \rho*u*Y - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - FK_vf(j, k, l, i - 1 + chemxb) = vel_K(dir_idx(1))*(rho_K*Y_K(i)) - end do - end if - - if (riemann_solver == 1 .or. riemann_solver == 4) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = 0._wp - FK_src_vf(j, k, l, i) = alpha_K(i - E_idx) - end do - - else - ! Could be bubbles_euler! - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) - end do + else + ! Could be bubbles_euler! + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_vf(j, k, l, i) = vel_K(dir_idx(1))*alpha_K(i - E_idx) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + FK_src_vf(j, k, l, i) = vel_K(dir_idx(1)) + end do - end if + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() #endif end subroutine s_convert_primitive_to_flux_variables @@ -1701,4 +1701,4 @@ contains end subroutine s_compute_fast_magnetosonic_speed #endif -end module m_variables_conversion \ No newline at end of file +end module m_variables_conversion diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 0364d2f21..15864f1e2 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -221,10 +221,10 @@ contains deallocate (phi_rn) $:GPU_PARALLEL_LOOP(private='[myalpha,myalpha_rho, myRho, B_tait,c, small_gamma, frequency_local, gauss_sigma_time_local, mass_src_diff, mom_src_diff, source_temporal, j, k, l, q ]', copyin = '[sum_BB, freq_conv_flag, gauss_conv_flag, sim_time]') - do i = 1, num_points - j = source_spatials(ai)%coord(1, i) - k = source_spatials(ai)%coord(2, i) - l = source_spatials(ai)%coord(3, i) + do i = 1, num_points + j = source_spatials(ai)%coord(1, i) + k = source_spatials(ai)%coord(2, i) + l = source_spatials(ai)%coord(3, i) ! Compute speed of sound myRho = 0._wp diff --git a/src/simulation/m_body_forces.fpp b/src/simulation/m_body_forces.fpp index 7c2d2b12f..cdf35fd36 100644 --- a/src/simulation/m_body_forces.fpp +++ b/src/simulation/m_body_forces.fpp @@ -151,13 +151,13 @@ contains if (bf_z) then ! z-direction body forces $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & - rhoM(j, k, l)*accel_bf(3) - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(momxe)%sf(j, k, l) = rhs_vf(momxe)%sf(j, k, l) + & + rhoM(j, k, l)*accel_bf(3) + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + q_cons_vf(momxe)%sf(j, k, l)*accel_bf(3) end do end do end do diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 4eca1e249..d8bed3ad1 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -293,25 +293,25 @@ contains if (adap_dt) then - my_divu = real(divu_in%sf(j, k, l), kind=wp) - call s_advance_step(myRho, myP, myR, myV, R0(q), & - pb_local, pbdot, alf, n_tait, B_tait, & - bub_adv_src(j, k, l), my_divu, & - dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & - dmBeta_t, dmCson, adap_dt_stop) - - q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR - q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV - - else - my_divu = real(divu_in%sf(j, k, l), kind=wp) - rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + my_divu = real(divu_in%sf(j, k, l), kind=wp) + call s_advance_step(myRho, myP, myR, myV, R0(q), & pb_local, pbdot, alf, n_tait, B_tait, & bub_adv_src(j, k, l), my_divu, & - dmCson) - bub_v_src(j, k, l, q) = nbub*rddot - bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) - end if + dmBub_id, dmMass_v, dmMass_n, dmBeta_c, & + dmBeta_t, dmCson, adap_dt_stop) + + q_cons_vf(rs(q))%sf(j, k, l) = nbub*myR + q_cons_vf(vs(q))%sf(j, k, l) = nbub*myV + + else + my_divu = real(divu_in%sf(j, k, l), kind=wp) + rddot = f_rddot(myRho, myP, myR, myV, R0(q), & + pb_local, pbdot, alf, n_tait, B_tait, & + bub_adv_src(j, k, l), my_divu, & + dmCson) + bub_v_src(j, k, l, q) = nbub*rddot + bub_r_src(j, k, l, q) = q_cons_vf(vs(q))%sf(j, k, l) + end if adap_dt_stop_max = max(adap_dt_stop_max, adap_dt_stop) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 67274568e..02d34ccf1 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -721,14 +721,14 @@ contains ! (q / (1 - beta)) * d(beta)/dt source if (p == 0) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do k = 0, p - do j = 0, n - do i = 0, m - do l = 1, E_idx - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & - q_cons_vf(l)%sf(i, j, k)*(q_beta(2)%sf(i, j, k) + & - q_beta(5)%sf(i, j, k)) + do k = 0, p + do j = 0, n + do i = 0, m + do l = 1, E_idx + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(l)%sf(i, j, k) = rhs_vf(l)%sf(i, j, k) + & + q_cons_vf(l)%sf(i, j, k)*(q_beta(2)%sf(i, j, k) + & + q_beta(5)%sf(i, j, k)) end if end do @@ -760,15 +760,15 @@ contains ! (q / (1 - beta)) * d(beta)/dt source $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & - (1._wp - q_beta(1)%sf(i, j, k))/ & - q_beta(1)%sf(i, j, k)* & - q_beta(3)%sf(i, j, k) - end if + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(contxe + l)%sf(i, j, k) = rhs_vf(contxe + l)%sf(i, j, k) - & + (1._wp - q_beta(1)%sf(i, j, k))/ & + q_beta(1)%sf(i, j, k)* & + q_beta(3)%sf(i, j, k) + end if end do end do end do @@ -776,10 +776,10 @@ contains !source in energy $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(2)%beg, idwbuff(2)%end - do i = idwbuff(1)%beg, idwbuff(1)%end - q_beta(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) + do k = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(2)%beg, idwbuff(2)%end + do i = idwbuff(1)%beg, idwbuff(1)%end + q_beta(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k)*q_prim_vf(contxe + l)%sf(i, j, k) end do end do end do @@ -789,14 +789,14 @@ contains ! (beta / (1 - beta)) * d(Pu)/dl source $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then - rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & - q_beta(4)%sf(i, j, k)*(1._wp - q_beta(1)%sf(i, j, k))/ & - q_beta(1)%sf(i, j, k) - end if + do k = 0, p + do j = 0, n + do i = 0, m + if (q_beta(1)%sf(i, j, k) > (1._wp - lag_params%valmaxvoid)) then + rhs_vf(E_idx)%sf(i, j, k) = rhs_vf(E_idx)%sf(i, j, k) - & + q_beta(4)%sf(i, j, k)*(1._wp - q_beta(1)%sf(i, j, k))/ & + q_beta(1)%sf(i, j, k) + end if end do end do end do @@ -847,11 +847,11 @@ contains call nvtxStartRange("BUBBLES-LAGRANGE-KERNELS") $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, q_beta_idx - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta(i)%sf(j, k, l) = 0._wp + do i = 1, q_beta_idx + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta(i)%sf(j, k, l) = 0._wp end do end do end do @@ -863,13 +863,13 @@ contains !Store 1-beta $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_beta(1)%sf(j, k, l) = 1._wp - q_beta(1)%sf(j, k, l) - ! Limiting void fraction given max value - q_beta(1)%sf(j, k, l) = max(q_beta(1)%sf(j, k, l), & - 1._wp - lag_params%valmaxvoid) + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_beta(1)%sf(j, k, l) = 1._wp - q_beta(1)%sf(j, k, l) + ! Limiting void fraction given max value + q_beta(1)%sf(j, k, l) = max(q_beta(1)%sf(j, k, l), & + 1._wp - lag_params%valmaxvoid) end do end do end do @@ -1376,14 +1376,14 @@ contains if (dir == 1) then ! Gradient in x dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq(i, j, k) = q(i, j, k)*(dx(i + 1) - dx(i - 1)) & - + q(i + 1, j, k)*(dx(i) + dx(i - 1)) & - - q(i - 1, j, k)*(dx(i) + dx(i + 1)) - dq(i, j, k) = dq(i, j, k)/ & - ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) + do k = 0, p + do j = 0, n + do i = 0, m + dq(i, j, k) = q(i, j, k)*(dx(i + 1) - dx(i - 1)) & + + q(i + 1, j, k)*(dx(i) + dx(i - 1)) & + - q(i - 1, j, k)*(dx(i) + dx(i + 1)) + dq(i, j, k) = dq(i, j, k)/ & + ((dx(i) + dx(i - 1))*(dx(i) + dx(i + 1))) end do end do end do @@ -1391,14 +1391,14 @@ contains elseif (dir == 2) then ! Gradient in y dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq(i, j, k) = q(i, j, k)*(dy(j + 1) - dy(j - 1)) & - + q(i, j + 1, k)*(dy(j) + dy(j - 1)) & - - q(i, j - 1, k)*(dy(j) + dy(j + 1)) - dq(i, j, k) = dq(i, j, k)/ & - ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) + do k = 0, p + do j = 0, n + do i = 0, m + dq(i, j, k) = q(i, j, k)*(dy(j + 1) - dy(j - 1)) & + + q(i, j + 1, k)*(dy(j) + dy(j - 1)) & + - q(i, j - 1, k)*(dy(j) + dy(j + 1)) + dq(i, j, k) = dq(i, j, k)/ & + ((dy(j) + dy(j - 1))*(dy(j) + dy(j + 1))) end do end do end do @@ -1406,14 +1406,14 @@ contains elseif (dir == 3) then ! Gradient in z dir. $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m - dq(i, j, k) = q(i, j, k)*(dz(k + 1) - dz(k - 1)) & - + q(i, j, k + 1)*(dz(k) + dz(k - 1)) & - - q(i, j, k - 1)*(dz(k) + dz(k + 1)) - dq(i, j, k) = dq(i, j, k)/ & - ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) + do k = 0, p + do j = 0, n + do i = 0, m + dq(i, j, k) = q(i, j, k)*(dz(k + 1) - dz(k - 1)) & + + q(i, j, k + 1)*(dz(k) + dz(k - 1)) & + - q(i, j, k - 1)*(dz(k) + dz(k + 1)) + dq(i, j, k) = dq(i, j, k)/ & + ((dz(k) + dz(k - 1))*(dz(k) + dz(k + 1))) end do end do end do @@ -1514,15 +1514,15 @@ contains lag_void_avg = 0._wp lag_vol = 0._wp $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') - do k = 0, p - do j = 0, n - do i = 0, m - lag_void_max = max(lag_void_max, 1._wp - q_beta(1)%sf(i, j, k)) - call s_get_char_vol(i, j, k, volcell) - if ((1._wp - q_beta(1)%sf(i, j, k)) > 5.0d-11) then - lag_void_avg = lag_void_avg + (1._wp - q_beta(1)%sf(i, j, k))*volcell - lag_vol = lag_vol + volcell - end if + do k = 0, p + do j = 0, n + do i = 0, m + lag_void_max = max(lag_void_max, 1._wp - q_beta(1)%sf(i, j, k)) + call s_get_char_vol(i, j, k, volcell) + if ((1._wp - q_beta(1)%sf(i, j, k)) > 5.0d-11) then + lag_void_avg = lag_void_avg + (1._wp - q_beta(1)%sf(i, j, k))*volcell + lag_vol = lag_vol + volcell + end if end do end do end do diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 0f3154cdc..99bb465f7 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -56,40 +56,40 @@ contains integer :: l $:GPU_PARALLEL_LOOP(private='[l,s_coord,cell]') - do l = 1, nBubs + do l = 1, nBubs - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - call s_get_cell(s_coord, cell) + volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp + s_coord(1:3) = lbk_s(l, 1:3, 2) + call s_get_cell(s_coord, cell) - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - if (num_dims == 2) then - Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth - if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi - else - Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) - end if + if (num_dims == 2) then + Vol = dx(cell(1))*dy(cell(2))*lag_params%charwidth + if (cyl_coord) Vol = dx(cell(1))*dy(cell(2))*y_cc(cell(2))*2._wp*pi + else + Vol = dx(cell(1))*dy(cell(2))*dz(cell(3)) + end if - !Update void fraction field - addFun1 = strength_vol/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cell(1), cell(2), cell(3)) = updatedvar(1)%sf(cell(1), cell(2), cell(3)) + real(addFun1, kind=stp) + !Update void fraction field + addFun1 = strength_vol/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar(1)%sf(cell(1), cell(2), cell(3)) = updatedvar(1)%sf(cell(1), cell(2), cell(3)) + real(addFun1, kind=stp) - !Update time derivative of void fraction - addFun2 = strength_vel/Vol + !Update time derivative of void fraction + addFun2 = strength_vel/Vol + $:GPU_ATOMIC(atomic='update') + updatedvar(2)%sf(cell(1), cell(2), cell(3)) = updatedvar(2)%sf(cell(1), cell(2), cell(3)) + real(addFun2, kind=stp) + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = (strength_vol*strength_vel)/Vol $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cell(1), cell(2), cell(3)) = updatedvar(2)%sf(cell(1), cell(2), cell(3)) + real(addFun2, kind=stp) - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = (strength_vol*strength_vel)/Vol - $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cell(1), cell(2), cell(3)) = updatedvar(5)%sf(cell(1), cell(2), cell(3)) + real(addFun3, kind=stp) - end if - end do + updatedvar(5)%sf(cell(1), cell(2), cell(3)) = updatedvar(5)%sf(cell(1), cell(2), cell(3)) + real(addFun3, kind=stp) + end if + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_deltafunc @@ -121,19 +121,7 @@ contains smearGridz = smearGrid if (p == 0) smearGridz = 1 -<<<<<<< HEAD $:GPU_PARALLEL_LOOP(private='[nodecoord,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') - do l = 1, nBubs - nodecoord(1:3) = 0 - center(1:3) = 0._wp - volpart = 4._wp/3._wp*pi*lbk_rad(l, 2)**3._wp - s_coord(1:3) = lbk_s(l, 1:3, 2) - center(1:2) = lbk_pos(l, 1:2, 2) - if (p > 0) center(3) = lbk_pos(l, 3, 2) - call s_get_cell(s_coord, cell) - call s_compute_stddsv(cell, volpart, stddsv) -======= - $:GPU_PARALLEL_LOOP(private='[nodecoord,i,j,k,l,s_coord,cell,center]', copyin='[smearGrid,smearGridz]') do l = 1, nBubs nodecoord(1:3) = 0 center(1:3) = 0._wp @@ -143,72 +131,71 @@ contains if (p > 0) center(3) = lbk_pos(l, 3, 2) call s_get_cell(s_coord, cell) call s_compute_stddsv(cell, volpart, stddsv) ->>>>>>> 723822d0b71c437e734f7ca0fcd386b0e3884a79 - - strength_vol = volpart - strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) - - $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') - do i = 1, smearGrid - do j = 1, smearGrid - do k = 1, smearGridz - cellaux(1) = cell(1) + i - (mapCells + 1) - cellaux(2) = cell(2) + j - (mapCells + 1) - cellaux(3) = cell(3) + k - (mapCells + 1) - if (p == 0) cellaux(3) = 0 - !Check if the cells intended to smear the bubbles in are in the computational domain - !and redefine the cells for symmetric boundary - call s_check_celloutside(cellaux, celloutside) - - if (.not. celloutside) then - - nodecoord(1) = x_cc(cellaux(1)) - nodecoord(2) = y_cc(cellaux(2)) - if (p > 0) nodecoord(3) = z_cc(cellaux(3)) - call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) - if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) - - ! Relocate cells for bubbles intersecting symmetric boundaries - if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then - call s_shift_cell_symmetric_bc(cellaux, cell) - end if - else - func = 0._wp - func2 = 0._wp - cellaux(1) = cell(1) - cellaux(2) = cell(2) - cellaux(3) = cell(3) - if (p == 0) cellaux(3) = 0 + strength_vol = volpart + strength_vel = 4._wp*pi*lbk_rad(l, 2)**2._wp*lbk_vel(l, 2) + + $:GPU_LOOP(collapse=3,private='[cellaux,nodecoord]') + do i = 1, smearGrid + do j = 1, smearGrid + do k = 1, smearGridz + cellaux(1) = cell(1) + i - (mapCells + 1) + cellaux(2) = cell(2) + j - (mapCells + 1) + cellaux(3) = cell(3) + k - (mapCells + 1) + if (p == 0) cellaux(3) = 0 + + !Check if the cells intended to smear the bubbles in are in the computational domain + !and redefine the cells for symmetric boundary + call s_check_celloutside(cellaux, celloutside) + + if (.not. celloutside) then + + nodecoord(1) = x_cc(cellaux(1)) + nodecoord(2) = y_cc(cellaux(2)) + if (p > 0) nodecoord(3) = z_cc(cellaux(3)) + call s_applygaussian(center, cellaux, nodecoord, stddsv, 0._wp, func) + if (lag_params%cluster_type >= 4) call s_applygaussian(center, cellaux, nodecoord, stddsv, 1._wp, func2) + + ! Relocate cells for bubbles intersecting symmetric boundaries + if (any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == BC_REFLECTIVE)) then + call s_shift_cell_symmetric_bc(cellaux, cell) end if - - !Update void fraction field - addFun1 = func*strength_vol - $:GPU_ATOMIC(atomic='update') - updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + real(addFun1, kind=stp) - - !Update time derivative of void fraction - addFun2 = func*strength_vel + else + func = 0._wp + func2 = 0._wp + cellaux(1) = cell(1) + cellaux(2) = cell(2) + cellaux(3) = cell(3) + if (p == 0) cellaux(3) = 0 + end if + + !Update void fraction field + addFun1 = func*strength_vol + $:GPU_ATOMIC(atomic='update') + updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar(1)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + real(addFun1, kind=stp) + + !Update time derivative of void fraction + addFun2 = func*strength_vel + $:GPU_ATOMIC(atomic='update') + updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + real(addFun2, kind=stp) + + !Product of two smeared functions + !Update void fraction * time derivative of void fraction + if (lag_params%cluster_type >= 4) then + addFun3 = func2*strength_vol*strength_vel $:GPU_ATOMIC(atomic='update') - updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar(2)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + real(addFun2, kind=stp) - - !Product of two smeared functions - !Update void fraction * time derivative of void fraction - if (lag_params%cluster_type >= 4) then - addFun3 = func2*strength_vol*strength_vel - $:GPU_ATOMIC(atomic='update') - updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & - updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & - + real(addFun3, kind=stp) - end if - end do + updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) = & + updatedvar(5)%sf(cellaux(1), cellaux(2), cellaux(3)) & + + real(addFun3, kind=stp) + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_gaussian @@ -394,7 +381,7 @@ contains !> The purpose of this procedure is to calculate the characteristic cell volume !! @param cell Computational coordinates (x, y, z) !! @param Charvol Characteristic volume - subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & & cray_inline=True) @@ -432,4 +419,4 @@ contains end subroutine s_get_cell -end module m_bubbles_EL_kernels \ No newline at end of file +end module m_bubbles_EL_kernels diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 0ec7674bc..94c47f75b 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -786,343 +786,342 @@ contains ! FD2 or FD4 of RHS at j = 0 $:GPU_PARALLEL_LOOP(collapse=2, private='[r,k,alpha_rho, vel, adv_local, mf, dvel_ds, dadv_ds, Re_cbc, dalpha_rho_ds, dpres_ds, dvel_dt, dadv_dt, dalpha_rho_dt, L, lambda, Ys, dYs_dt, dYs_ds, h_k, Cp_i, Gamma_i, Xs, drho_dt, dpres_dt, dpi_inf_dt, dqv_dt, dgamma_dt, rho, pres, E, H, gamma, pi_inf, qv, c, Ma, T, sum_Enthalpies, Cv, Cp, e_mix, Mw, R_gas, vel_K_sum, vel_dv_dt_sum, i, j]', copyin='[dir_idx]') - do r = is3%beg, is3%end - do k = is2%beg, is2%end + do r = is3%beg, is3%end + do k = is2%beg, is2%end - ! Transferring the Primitive Variables - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do + ! Transferring the Primitive Variables + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho(i) = q_prim_rs${XYZ}$_vf(0, k, r, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel(i) = q_prim_rs${XYZ}$_vf(0, k, r, contxe + i) + end do - vel_K_sum = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_K_sum = vel_K_sum + vel(i)**2._wp - end do + vel_K_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_K_sum = vel_K_sum + vel(i)**2._wp + end do + + pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) + end do - pres = q_prim_rs${XYZ}$_vf(0, k, r, E_idx) + if (bubbles_euler) then + call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + else + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + mf(i) = alpha_rho(i)/rho + end do + + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - adv_local(i) = q_prim_rs${XYZ}$_vf(0, k, r, E_idx + i) + do i = chemxb, chemxe + Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) end do - if (bubbles_euler) then - call s_convert_species_to_mixture_variables_bubbles_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) - else - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, adv_local, alpha_rho, Re_cbc) + call get_mixture_molecular_weight(Ys, Mw) + R_gas = gas_constant/Mw + T = pres/rho/R_gas + call get_mixture_specific_heat_cp_mass(T, Ys, Cp) + call get_mixture_energy_mass(T, Ys, e_mix) + E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + call get_mole_fractions(Mw, Ys, Xs) + call get_species_specific_heats_r(T, Cp_i) + Gamma_i = Cp_i/(Cp_i - 1.0_wp) + gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cv_mass(T, Ys, Cv) + gamma = 1.0_wp/(Cp/Cv - 1.0_wp) end if + else + E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - mf(i) = alpha_rho(i)/rho - end do + H = (E + pres)/rho - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys(i - chemxb + 1) = q_prim_rs${XYZ}$_vf(0, k, r, i) - end do + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) - call get_mixture_molecular_weight(Ys, Mw) - R_gas = gas_constant/Mw - T = pres/rho/R_gas - call get_mixture_specific_heat_cp_mass(T, Ys, Cp) - call get_mixture_energy_mass(T, Ys, e_mix) - E = rho*e_mix + 5.e-1_wp*rho*vel_K_sum - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - call get_mole_fractions(Mw, Ys, Xs) - call get_species_specific_heats_r(T, Cp_i) - Gamma_i = Cp_i/(Cp_i - 1.0_wp) - gamma = sum(Xs(:)/(Gamma_i(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cv_mass(T, Ys, Cv) - gamma = 1.0_wp/(Cp/Cv - 1.0_wp) - end if - else - E = gamma*pres + pi_inf + 5.e-1_wp*rho*vel_K_sum - end if + ! First-Order Spatial Derivatives of Primitive Variables - H = (E + pres)/rho + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_ds(i) = 0._wp + end do - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv_local, vel_K_sum, 0._wp, c) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel_ds(i) = 0._wp + end do - ! First-Order Spatial Derivatives of Primitive Variables + dpres_ds = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, advxe - E_idx + dadv_ds(i) = 0._wp + end do + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_ds(i) = 0._wp + do i = 1, num_species + dYs_ds(i) = 0._wp end do + end if + + $:GPU_LOOP(parallelism='[seq]') + do j = 0, buff_size + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dalpha_rho_ds(i) + end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - dvel_ds(i) = 0._wp + dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dvel_ds(i) end do - dpres_ds = 0._wp + dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dpres_ds $:GPU_LOOP(parallelism='[seq]') do i = 1, advxe - E_idx - dadv_ds(i) = 0._wp + dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dadv_ds(i) end do if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species - dYs_ds(i) = 0._wp + dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & + fd_coef_${XYZ}$ (j, cbc_loc) + & + dYs_ds(i) end do end if + end do - $:GPU_LOOP(parallelism='[seq]') - do j = 0, buff_size - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dalpha_rho_ds(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, contxe + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dvel_ds(i) - end do - - dpres_ds = q_prim_rs${XYZ}$_vf(j, k, r, E_idx)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dpres_ds + ! First-Order Temporal Derivatives of Primitive Variables + lambda(1) = vel(dir_idx(1)) - c + lambda(2) = vel(dir_idx(1)) + lambda(3) = vel(dir_idx(1)) + c + + Ma = vel(dir_idx(1))/c + + if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then + call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then + call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then + call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) + ! Add GRCBC for Subsonic Inflow + if (bc_${XYZ}$%grcbc_in) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, E_idx + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dadv_ds(i) + do i = 2, momxb + L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end do - - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_ds(i) = q_prim_rs${XYZ}$_vf(j, k, r, chemxb - 1 + i)* & - fd_coef_${XYZ}$ (j, cbc_loc) + & - dYs_ds(i) - end do - end if - end do - - ! First-Order Temporal Derivatives of Primitive Variables - lambda(1) = vel(dir_idx(1)) - c - lambda(2) = vel(dir_idx(1)) - lambda(3) = vel(dir_idx(1)) + c - - Ma = vel(dir_idx(1))/c - - if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SLIP_WALL) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SLIP_WALL)) then - call s_compute_slip_wall_L(lambda, L, rho, c, dpres_ds, dvel_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_BUFFER) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_BUFFER)) then - call s_compute_nonreflecting_subsonic_buffer_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_INFLOW)) then - call s_compute_nonreflecting_subsonic_inflow_L(lambda, L, rho, c, dpres_ds, dvel_ds) - ! Add GRCBC for Subsonic Inflow - if (bc_${XYZ}$%grcbc_in) then - $:GPU_LOOP(parallelism='[seq]') - do i = 2, momxb - L(2) = c**3._wp*Ma*(alpha_rho(i - 1) - alpha_rho_in(i - 1, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - c*Ma*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end do - if (n > 0) then - L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) - if (p > 0) then - L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) - end if + if (n > 0) then + L(momxb + 1) = c*Ma*(vel(dir_idx(2)) - vel_in(${CBC_DIR}$, dir_idx(2)))/Del_in(${CBC_DIR}$) + if (p > 0) then + L(momxb + 2) = c*Ma*(vel(dir_idx(3)) - vel_in(${CBC_DIR}$, dir_idx(3)))/Del_in(${CBC_DIR}$) end if - $:GPU_LOOP(parallelism='[seq]') - do i = E_idx, advxe - 1 - L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) - end do - L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then - call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) - ! Add GRCBC for Subsonic Outflow (Pressure) - if (bc_${XYZ}$%grcbc_out) then - L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) - - ! Add GRCBC for Subsonic Outflow (Normal Velocity) - if (bc_${XYZ}$%grcbc_vel_out) then - L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = E_idx, advxe - 1 + L(i) = c*Ma*(adv_local(i + 1 - E_idx) - alpha_in(i + 1 - E_idx, ${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end do + L(advxe) = rho*c**2._wp*(1._wp + Ma)*(vel(dir_idx(1)) + vel_in(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_in(${CBC_DIR}$) + c*(1._wp + Ma)*(pres - pres_in(${CBC_DIR}$))/Del_in(${CBC_DIR}$) + end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_NR_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_NR_SUB_OUTFLOW)) then + call s_compute_nonreflecting_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + ! Add GRCBC for Subsonic Outflow (Pressure) + if (bc_${XYZ}$%grcbc_out) then + L(advxe) = c*(1._wp - Ma)*(pres - pres_out(${CBC_DIR}$))/Del_out(${CBC_DIR}$) + + ! Add GRCBC for Subsonic Outflow (Normal Velocity) + if (bc_${XYZ}$%grcbc_vel_out) then + L(advxe) = L(advxe) + rho*c**2._wp*(1._wp - Ma)*(vel(dir_idx(1)) + vel_out(${CBC_DIR}$, dir_idx(1))*sign(1, cbc_loc))/Del_out(${CBC_DIR}$) end if - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then - call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then - call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then - call s_compute_supersonic_inflow_L(L) - else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & - (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then - call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) end if + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_FF_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_FF_SUB_OUTFLOW)) then + call s_compute_force_free_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_CP_SUB_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_CP_SUB_OUTFLOW)) then + call s_compute_constant_pressure_subsonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_INFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_INFLOW)) then + call s_compute_supersonic_inflow_L(L) + else if ((cbc_loc == -1 .and. bc${XYZ}$b == BC_CHAR_SUP_OUTFLOW) .or. & + (cbc_loc == 1 .and. bc${XYZ}$e == BC_CHAR_SUP_OUTFLOW)) then + call s_compute_supersonic_outflow_L(lambda, L, rho, c, mf, dalpha_rho_ds, dpres_ds, dvel_ds, dadv_ds, dYs_ds) + end if + + ! Be careful about the cylindrical coordinate! + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & + /y_cc(n) + else + dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + dalpha_rho_dt(i) = & + -(L(i + 1) - mf(i)*dpres_dt)/(c*c) + end do - ! Be careful about the cylindrical coordinate! - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) + rho*c*c*vel(dir_idx(1)) & - /y_cc(n) - else - dpres_dt = -5.e-1_wp*(L(advxe) + L(1)) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & + (L(1) - L(advxe))/(2._wp*rho*c) + & + (dir_flg(dir_idx(i)) - 1._wp)* & + L(momxb + i - 1) + end do + + vel_dv_dt_sum = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) + end do + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - dalpha_rho_dt(i) = & - -(L(i + 1) - mf(i)*dpres_dt)/(c*c) + do i = 1, num_species + dYs_dt(i) = -1._wp*L(chemxb + i - 1) end do + end if + ! The treatment of void fraction source is unclear + if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel_dt(dir_idx(i)) = dir_flg(dir_idx(i))* & - (L(1) - L(advxe))/(2._wp*rho*c) + & - (dir_flg(dir_idx(i)) - 1._wp)* & - L(momxb + i - 1) + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) end do - - vel_dv_dt_sum = 0._wp + else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_dv_dt_sum = vel_dv_dt_sum + vel(i)*dvel_dt(i) + do i = 1, advxe - E_idx + dadv_dt(i) = -L(momxe + i) end do + end if - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - dYs_dt(i) = -1._wp*L(chemxb + i - 1) - end do - end if + drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp - ! The treatment of void fraction source is unclear - if (cyl_coord .and. cbc_dir == 2 .and. cbc_loc == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) !+ adv_local(i) * vel(dir_idx(1))/y_cc(n) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, advxe - E_idx - dadv_dt(i) = -L(momxe + i) - end do - end if + if (model_eqns == 1) then + drho_dt = dalpha_rho_dt(1) + dgamma_dt = dadv_dt(1) + #:if not MFC_CASE_OPTIMIZATION or num_fluids > 1 + dpi_inf_dt = dadv_dt(2) + #:endif + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + drho_dt = drho_dt + dalpha_rho_dt(i) + dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) + dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) + dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) + end do + end if - drho_dt = 0._wp; dgamma_dt = 0._wp; dpi_inf_dt = 0._wp; dqv_dt = 0._wp + ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dalpha_rho_dt(i) + end do - if (model_eqns == 1) then - drho_dt = dalpha_rho_dt(1) - dgamma_dt = dadv_dt(1) - #:if not MFC_CASE_OPTIMIZATION or num_fluids > 1 - dpi_inf_dt = dadv_dt(2) - #:endif - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - drho_dt = drho_dt + dalpha_rho_dt(i) - dgamma_dt = dgamma_dt + dadv_dt(i)*gammas(i) - dpi_inf_dt = dpi_inf_dt + dadv_dt(i)*pi_infs(i) - dqv_dt = dqv_dt + dalpha_rho_dt(i)*qvs(i) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, momxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*(vel(i - contxe)*drho_dt & + + rho*dvel_dt(i - contxe)) + end do - ! flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + if (chemistry) then + ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 + call get_species_enthalpies_rt(T, h_k) + sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dalpha_rho_dt(i) + do i = 1, num_species + #:block UNDEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) + #:endblock UNDEF_AMD + + #:block DEF_AMD + h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) + #:endblock DEF_AMD end do - + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) $:GPU_LOOP(parallelism='[seq]') - do i = momxb, momxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*(vel(i - contxe)*drho_dt & - + rho*dvel_dt(i - contxe)) + do i = 1, num_species + flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & + + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) + end do + else + flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + + ds(0)*(pres*dgamma_dt & + + gamma*dpres_dt & + + dpi_inf_dt & + + dqv_dt & + + rho*vel_dv_dt_sum & + + 5.e-1_wp*drho_dt*vel_K_sum) + end if + + if (riemann_solver == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp end do - if (chemistry) then - ! Evolution of LODI equation of energy for real gases adjusted to perfect gas, doi:10.1006/jcph.2002.6990 - call get_species_enthalpies_rt(T, h_k) - sum_Enthalpies = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - #:block UNDEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) - #:endblock UNDEF_AMD - - #:block DEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) - #:endblock DEF_AMD - end do - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_species - flux_rs${XYZ}$_vf_l(-1, k, r, i - 1 + chemxb) = flux_rs${XYZ}$_vf_l(0, k, r, chemxb + i - 1) & - + ds(0)*(drho_dt*Ys(i) + rho*dYs_dt(i)) - end do - else - flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & - + ds(0)*(pres*dgamma_dt & - + gamma*dpres_dt & - + dpi_inf_dt & - + dqv_dt & - + rho*vel_dv_dt_sum & - + 5.e-1_wp*drho_dt*vel_K_sum) - end if - - if (riemann_solver == 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = 0._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & - 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & - *sign(1._wp, vel(dir_idx(1))) & - *(flux_rs${XYZ}$_vf_l(0, k, r, i) & - + vel(dir_idx(1)) & - *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & - + ds(0)*dadv_dt(i - E_idx)) - end do - - else + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = & + 1._wp/max(abs(vel(dir_idx(1))), sgm_eps) & + *sign(1._wp, vel(dir_idx(1))) & + *(flux_rs${XYZ}$_vf_l(0, k, r, i) & + + vel(dir_idx(1)) & + *flux_src_rs${XYZ}$_vf_l(0, k, r, i) & + + ds(0)*dadv_dt(i - E_idx)) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & - ds(0)*dadv_dt(i - E_idx) - end do + else - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf_l(-1, k, r, i) = flux_rs${XYZ}$_vf_l(0, k, r, i) + & + ds(0)*dadv_dt(i - E_idx) + end do - end if - ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_src_rs${XYZ}$_vf_l(-1, k, r, i) = flux_src_rs${XYZ}$_vf_l(0, k, r, i) + end do + end if + ! END: flux_rs_vf_l and flux_src_rs_vf_l at j = -1/2 end do end do diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 2ded1128d..175391899 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -137,15 +137,15 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_GPU) - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do - $:END_GPU_PARALLEL_LOOP() + end do + $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do k = 1, sys_size @@ -169,13 +169,13 @@ contains $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -188,38 +188,38 @@ contains #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else - ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) - call hipCheck(hipDeviceSynchronize()) + ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + call hipCheck(hipDeviceSynchronize()) #endif - #:endcall GPU_HOST_DATA + #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -228,66 +228,66 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP() - end do - -#else - Nfq = 3 - do j = 0, m - do k = 1, sys_size - data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) - call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) - data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) - call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) - data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) + $:END_GPU_PARALLEL_LOOP() end do - end do - ! Apply Fourier filter to additional rings - do i = 1, fourier_rings - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) +#else + Nfq = 3 do j = 0, m do k = 1, sys_size data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) + q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) + end do + end do + + ! Apply Fourier filter to additional rings + do i = 1, fourier_rings + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + do j = 0, m + do k = 1, sys_size + data_fltr_cmplx(:) = (0_dp, 0_dp) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) + call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) + data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) + call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) + data_real(:) = data_real(:)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) + end do end do end do - end do #endif - end subroutine s_apply_fourier_filter + end subroutine s_apply_fourier_filter - !> The purpose of this subroutine is to destroy the fftw plan + !> The purpose of this subroutine is to destroy the fftw plan !! that will be used in the forward and backward DFTs when !! applying the Fourier filter in the azimuthal direction. - impure subroutine s_finalize_fftw_module + impure subroutine s_finalize_fftw_module #if defined(MFC_GPU) - integer :: ierr !< Generic flag used to identify and report GPU errors - @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) + integer :: ierr !< Generic flag used to identify and report GPU errors + @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) #if defined(__PGI) - ierr = cufftDestroy(fwd_plan_gpu) - ierr = cufftDestroy(bwd_plan_gpu) + ierr = cufftDestroy(fwd_plan_gpu) + ierr = cufftDestroy(bwd_plan_gpu) #else - ierr = hipfftDestroy(fwd_plan_gpu) - ierr = hipfftDestroy(bwd_plan_gpu) + ierr = hipfftDestroy(fwd_plan_gpu) + ierr = hipfftDestroy(bwd_plan_gpu) #endif #else - call fftw_free(fftw_real_data) - call fftw_free(fftw_cmplx_data) - call fftw_free(fftw_fltr_cmplx_data) + call fftw_free(fftw_real_data) + call fftw_free(fftw_cmplx_data) + call fftw_free(fftw_fltr_cmplx_data) - call fftw_destroy_plan(fwd_plan) - call fftw_destroy_plan(bwd_plan) + call fftw_destroy_plan(fwd_plan) + call fftw_destroy_plan(bwd_plan) #endif - end subroutine s_finalize_fftw_module -end module m_fftw + end subroutine s_finalize_fftw_module + end module m_fftw diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index fcf1fb404..9e21ef882 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -107,103 +107,103 @@ contains integer :: j, k, l, i, r $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb, i]') - do l = 0, p - do k = 0, n - do j = 0, m + do l = 0, p + do k = 0, n + do j = 0, m + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) + alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) + end do + ! If in simulation, use acc mixture subroutines + call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & + alpha_rho_k, Re, G_local, Gs_hyper) + rho = max(rho, sgm_eps) + G_local = max(G_local, sgm_eps) + !if ( G_local <= verysmall ) G_K = 0._wp + + if (G_local > verysmall) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_k(i) = q_cons_vf(i)%sf(j, k, l) - alpha_k(i) = q_cons_vf(advxb + i - 1)%sf(j, k, l) + do i = 1, tensor_size + tensora(i) = 0._wp end do - ! If in simulation, use acc mixture subroutines - call s_convert_species_to_mixture_variables_acc(rho, gamma, pi_inf, qv, alpha_k, & - alpha_rho_k, Re, G_local, Gs_hyper) - rho = max(rho, sgm_eps) - G_local = max(G_local, sgm_eps) - !if ( G_local <= verysmall ) G_K = 0._wp - - if (G_local > verysmall) then + ! STEP 1: computing the grad_xi tensor using finite differences + ! grad_xi definition / organization + ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx + ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy + ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + ! derivatives in the x-direction + tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) + ! derivatives in the y-direction + tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) + ! derivatives in the z-direction + tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + end do + ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse + tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) + tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) + tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) + tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) + tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) + tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) + tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) + tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) + tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) + + ! STEP 2b: computing the determinant of the grad_xi tensor + tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & + - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & + + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) + + if (tensorb(tensor_size) > verysmall) then + ! STEP 2c: computing the inverse of grad_xi tensor = F + ! tensorb is the adjoint, tensora becomes F $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - tensora(i) = 0._wp + do i = 1, tensor_size - 1 + tensora(i) = tensorb(i)/tensorb(tensor_size) end do - ! STEP 1: computing the grad_xi tensor using finite differences - ! grad_xi definition / organization - ! number for the tensor 1-3: dxix_dx, dxiy_dx, dxiz_dx - ! 4-6 : dxix_dy, dxiy_dy, dxiz_dy - ! 7-9 : dxix_dz, dxiy_dz, dxiz_dz + + ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) + tensorb(tensor_size) = 1._wp/tensorb(tensor_size) + + ! STEP 3: computing F transpose F + tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 + tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 + tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 + tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) + tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) + tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) + ! STEP 4: update the btensor, this is consistent with Riemann solvers + #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] + btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) + #:endfor + ! store the determinant at the last entry of the btensor + btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) + ! STEP 5a: updating the Cauchy stress primitive scalar field + if (hyper_model == 1) then + call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + elseif (hyper_model == 2) then + call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) + end if + ! STEP 5b: updating the pressure field + q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & + G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma + ! STEP 5c: updating the Cauchy stress conservative scalar field $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - ! derivatives in the x-direction - tensora(1) = tensora(1) + q_prim_vf(xibeg)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - tensora(2) = tensora(2) + q_prim_vf(xibeg + 1)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - tensora(3) = tensora(3) + q_prim_vf(xiend)%sf(j + r, k, l)*fd_coeff_x_hyper(r, j) - ! derivatives in the y-direction - tensora(4) = tensora(4) + q_prim_vf(xibeg)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - tensora(5) = tensora(5) + q_prim_vf(xibeg + 1)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - tensora(6) = tensora(6) + q_prim_vf(xiend)%sf(j, k + r, l)*fd_coeff_y_hyper(r, k) - ! derivatives in the z-direction - tensora(7) = tensora(7) + q_prim_vf(xibeg)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) - tensora(8) = tensora(8) + q_prim_vf(xibeg + 1)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) - tensora(9) = tensora(9) + q_prim_vf(xiend)%sf(j, k, l + r)*fd_coeff_z_hyper(r, l) + do i = 1, b_size - 1 + q_cons_vf(strxb + i - 1)%sf(j, k, l) = & + rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) end do - ! STEP 2a: computing the adjoint of the grad_xi tensor for the inverse - tensorb(1) = tensora(5)*tensora(9) - tensora(6)*tensora(8) - tensorb(2) = -(tensora(2)*tensora(9) - tensora(3)*tensora(8)) - tensorb(3) = tensora(2)*tensora(6) - tensora(3)*tensora(5) - tensorb(4) = -(tensora(4)*tensora(9) - tensora(6)*tensora(7)) - tensorb(5) = tensora(1)*tensora(9) - tensora(3)*tensora(7) - tensorb(6) = -(tensora(1)*tensora(6) - tensora(4)*tensora(3)) - tensorb(7) = tensora(4)*tensora(8) - tensora(5)*tensora(7) - tensorb(8) = -(tensora(1)*tensora(8) - tensora(2)*tensora(7)) - tensorb(9) = tensora(1)*tensora(5) - tensora(2)*tensora(4) - - ! STEP 2b: computing the determinant of the grad_xi tensor - tensorb(tensor_size) = tensora(1)*(tensora(5)*tensora(9) - tensora(6)*tensora(8)) & - - tensora(2)*(tensora(4)*tensora(9) - tensora(6)*tensora(7)) & - + tensora(3)*(tensora(4)*tensora(8) - tensora(5)*tensora(7)) - - if (tensorb(tensor_size) > verysmall) then - ! STEP 2c: computing the inverse of grad_xi tensor = F - ! tensorb is the adjoint, tensora becomes F - $:GPU_LOOP(parallelism='[seq]') - do i = 1, tensor_size - 1 - tensora(i) = tensorb(i)/tensorb(tensor_size) - end do - - ! STEP 2d: computing the J = det(F) = 1/det(\grad{\xi}) - tensorb(tensor_size) = 1._wp/tensorb(tensor_size) - - ! STEP 3: computing F transpose F - tensorb(1) = tensora(1)**2 + tensora(2)**2 + tensora(3)**2 - tensorb(5) = tensora(4)**2 + tensora(5)**2 + tensora(6)**2 - tensorb(9) = tensora(7)**2 + tensora(8)**2 + tensora(9)**2 - tensorb(2) = tensora(1)*tensora(4) + tensora(2)*tensora(5) + tensora(3)*tensora(6) - tensorb(3) = tensora(1)*tensora(7) + tensora(2)*tensora(8) + tensora(3)*tensora(9) - tensorb(6) = tensora(4)*tensora(7) + tensora(5)*tensora(8) + tensora(6)*tensora(9) - ! STEP 4: update the btensor, this is consistent with Riemann solvers - #:for BIJ, TXY in [(1,1),(2,2),(3,5),(4,3),(5,6),(6,9)] - btensor%vf(${BIJ}$)%sf(j, k, l) = tensorb(${TXY}$) - #:endfor - ! store the determinant at the last entry of the btensor - btensor%vf(b_size)%sf(j, k, l) = tensorb(tensor_size) - ! STEP 5a: updating the Cauchy stress primitive scalar field - if (hyper_model == 1) then - call s_neoHookean_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - elseif (hyper_model == 2) then - call s_Mooney_Rivlin_cauchy_solver(btensor%vf, q_prim_vf, G_local, j, k, l) - end if - ! STEP 5b: updating the pressure field - q_prim_vf(E_idx)%sf(j, k, l) = q_prim_vf(E_idx)%sf(j, k, l) - & - G_local*q_prim_vf(xiend + 1)%sf(j, k, l)/gamma - ! STEP 5c: updating the Cauchy stress conservative scalar field - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - q_cons_vf(strxb + i - 1)%sf(j, k, l) = & - rho*q_prim_vf(strxb + i - 1)%sf(j, k, l) - end do - end if end if + end if end do end do end do diff --git a/src/simulation/m_hypoelastic.fpp b/src/simulation/m_hypoelastic.fpp index 3c93f0041..a265b9bdd 100644 --- a/src/simulation/m_hypoelastic.fpp +++ b/src/simulation/m_hypoelastic.fpp @@ -104,7 +104,7 @@ contains ! calculate velocity gradients + rho_K and G_K ! TODO: re-organize these loops one by one for GPU efficiency if possible? - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -114,7 +114,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -130,7 +130,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (ndirs > 1) then - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -140,7 +140,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -161,7 +161,7 @@ contains ! 3D if (ndirs == 3) then - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -172,7 +172,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -196,32 +196,32 @@ contains end if end if - $:GPU_PARALLEL_LOOP(private='[k,l,q,rho_K, G_K]', collapse=3) - do q = 0, p - do l = 0, n - do k = 0, m - rho_K = 0._wp; G_K = 0._wp - do i = 1, num_fluids - rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) - G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) - end do + $:GPU_PARALLEL_LOOP(collapse=3,private='[rho_K, G_K]') + do q = 0, p + do l = 0, n + do k = 0, m + rho_K = 0._wp; G_K = 0._wp + do i = 1, num_fluids + rho_K = rho_K + q_prim_vf(i)%sf(k, l, q) !alpha_rho_K(1) + G_K = G_K + q_prim_vf(advxb - 1 + i)%sf(k, l, q)*Gs_hypo(i) !alpha_K(1) * Gs_hypo(1) + end do - if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) + if (cont_damage) G_K = G_K*max((1._wp - q_prim_vf(damage_idx)%sf(k, l, q)), 0._wp) - rho_K_field(k, l, q) = rho_K - G_K_field(k, l, q) = G_K + rho_K_field(k, l, q) = rho_K + G_K_field(k, l, q) = G_K - !TODO: take this out if not needed - if (G_K < verysmall) then - G_K_field(k, l, q) = 0 - end if + !TODO: take this out if not needed + if (G_K < verysmall) then + G_K_field(k, l, q) = 0 + end if end do end do end do $:END_GPU_PARALLEL_LOOP() ! apply rhs source term to elastic stress equation - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -236,7 +236,7 @@ contains $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -272,7 +272,7 @@ contains $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -341,7 +341,7 @@ contains if (cyl_coord .and. idir == 2) then - $:GPU_PARALLEL_LOOP(private='[k,l,q]', collapse=3) + $:GPU_PARALLEL_LOOP(collapse=3) do q = 0, p do l = 0, n do k = 0, m @@ -404,14 +404,30 @@ contains if (n == 0) then l = 0; q = 0 - $:GPU_PARALLEL_LOOP(private='[k]', copyin='[l,q]') - do k = 0, m - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(real(q_cons_vf(stress_idx%beg)%sf(k, l, q), kind=wp)) - tau_star, 0._wp))**cont_damage_s - end do + $:GPU_PARALLEL_LOOP() + do k = 0, m + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(abs(real(q_cons_vf(stress_idx%beg)%sf(k, l, q), kind=wp)) - tau_star, 0._wp))**cont_damage_s + end do $:END_GPU_PARALLEL_LOOP() elseif (p == 0) then q = 0 - $:GPU_PARALLEL_LOOP(private='[k,l,tau_p]', copyin='[q]', collapse=2) + $:GPU_PARALLEL_LOOP(collapse=2, private='[tau_p]') + do l = 0, n + do k = 0, m + ! Maximum principal stress + tau_p = 0.5_wp*(q_cons_vf(stress_idx%beg)%sf(k, l, q) + & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q)) + & + sqrt((q_cons_vf(stress_idx%beg)%sf(k, l, q) - & + q_cons_vf(stress_idx%beg + 2)%sf(k, l, q))**2.0_wp + & + 4._wp*q_cons_vf(stress_idx%beg + 1)%sf(k, l, q)**2.0_wp)/2._wp + + rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + $:GPU_PARALLEL_LOOP(collapse=3, private='[tau_xx, tau_xy, tau_yy, tau_xz, tau_yz, tau_zz, I1, I2, I3, temp, sqrt_term_1, sqrt_term_2, argument, phi, tau_p]') + do q = 0, p do l = 0, n do k = 0, m tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) @@ -446,45 +462,7 @@ contains rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s end do end do - $:END_GPU_PARALLEL_LOOP() - else - $:GPU_PARALLEL_LOOP(collapse=3, private='[k,l,q,tau_xx, tau_xy, tau_yy, tau_xz, tau_yz, tau_zz, I1, I2, I3, temp, sqrt_term_1, sqrt_term_2, argument, phi, tau_p]') - do q = 0, p - do l = 0, n - do k = 0, m - tau_xx = q_cons_vf(stress_idx%beg)%sf(k, l, q) - tau_xy = q_cons_vf(stress_idx%beg + 1)%sf(k, l, q) - tau_yy = q_cons_vf(stress_idx%beg + 2)%sf(k, l, q) - tau_xz = q_cons_vf(stress_idx%beg + 3)%sf(k, l, q) - tau_yz = q_cons_vf(stress_idx%beg + 4)%sf(k, l, q) - tau_zz = q_cons_vf(stress_idx%beg + 5)%sf(k, l, q) - - ! Invariants of the stress tensor - I1 = tau_xx + tau_yy + tau_zz - I2 = tau_xx*tau_yy + tau_xx*tau_zz + tau_yy*tau_zz - & - (tau_xy**2.0_wp + tau_xz**2.0_wp + tau_yz**2.0_wp) - I3 = tau_xx*tau_yy*tau_zz + 2.0_wp*tau_xy*tau_xz*tau_yz - & - tau_xx*tau_yz**2.0_wp - tau_yy*tau_xz**2.0_wp - tau_zz*tau_xy**2.0_wp - - ! Maximum principal stress - temp = I1**2.0_wp - 3.0_wp*I2 - sqrt_term_1 = sqrt(max(temp, 0.0_wp)) - if (sqrt_term_1 > verysmall) then ! Avoid 0/0 - argument = (2.0_wp*I1*I1*I1 - 9.0_wp*I1*I2 + 27.0_wp*I3)/ & - (2.0_wp*sqrt_term_1*sqrt_term_1*sqrt_term_1) - if (argument > 1.0_wp) argument = 1.0_wp - if (argument < -1.0_wp) argument = -1.0_wp - phi = acos(argument) - sqrt_term_2 = sqrt(max(I1**2.0_wp - 3.0_wp*I2, 0.0_wp)) - tau_p = I1/3.0_wp + 2.0_wp/sqrt(3.0_wp)*sqrt_term_2*cos(phi/3.0_wp) - else - tau_p = I1/3.0_wp - end if - - rhs_vf(damage_idx)%sf(k, l, q) = (alpha_bar*max(tau_p - tau_star, 0._wp))**cont_damage_s - end do - end do - end do + end do $:END_GPU_PARALLEL_LOOP() end if diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 11f73efd1..1f3c9a623 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -199,7 +199,7 @@ contains if (num_gps > 0) then $:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,pres_IP,vel_IP,vel_g,vel_norm_IP,r_IP, v_IP,pb_IP,mv_IP,nmom_IP,presb_IP,massv_IP,rho, gamma,pi_inf,Re_K,G_K,Gs,gp,innerp,norm,buf, radial_vector, rotation_velocity, j,k,l,q,qv_K,c_IP,nbub,patch_id]') - do i = 1, num_gps + do i = 1, num_gps gp = ghost_points(i) j = gp%loc(1) @@ -214,25 +214,25 @@ contains physical_loc = [x_cc(j), y_cc(k), 0._wp] end if - !Interpolate primitive variables at image point associated w/ GP - if (bubbles_euler .and. .not. qbmm) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP) - else if (qbmm .and. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP) - else if (qbmm .and. .not. polytropic) then - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & - r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) - else - call s_interpolate_image_point(q_prim_vf, gp, & - alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) - end if + !Interpolate primitive variables at image point associated w/ GP + if (bubbles_euler .and. .not. qbmm) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP) + else if (qbmm .and. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP) + else if (qbmm .and. .not. polytropic) then + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP, & + r_IP, v_IP, pb_IP, mv_IP, nmom_IP, pb_in, mv_in, presb_IP, massv_IP) + else + call s_interpolate_image_point(q_prim_vf, gp, & + alpha_rho_IP, alpha_IP, pres_IP, vel_IP, c_IP) + end if - dyn_pres = 0._wp + dyn_pres = 0._wp ! Set q_prim_vf params at GP so that mixture vars calculated properly $:GPU_LOOP(parallelism='[seq]') diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index c45593a45..60555b554 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -205,14 +205,14 @@ contains #endif $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac(j, k, l) = 0._stp - if (igr_iter_solver == 1) jac_old(j, k, l) = 0._stp - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac(j, k, l) = 0._stp + if (igr_iter_solver == 1) jac_old(j, k, l) = 0._stp end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p == 0) then @@ -290,81 +290,81 @@ contains do q = 1, num_iters $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_lx, rho_rx, rho_ly, rho_ry, rho_lz, rho_rz, fd_coeff]') - do l = 0, p - do k = 0, n - do j = 0, m - rho_lx = 0._wp - rho_rx = 0._wp - rho_ly = 0._wp - rho_ry = 0._wp - rho_lz = 0._wp - rho_rz = 0._wp - fd_coeff = 0._wp + do l = 0, p + do k = 0, n + do j = 0, m + rho_lx = 0._wp + rho_rx = 0._wp + rho_ly = 0._wp + rho_ry = 0._wp + rho_lz = 0._wp + rho_rz = 0._wp + fd_coeff = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_lx = rho_lx + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l), kind=wp)/2._wp - rho_rx = rho_rx + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l), kind=wp)/2._wp - rho_ly = rho_ly + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l), kind=wp)/2._wp - rho_ry = rho_ry + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l), kind=wp)/2._wp - if (p > 0) then - rho_lz = rho_lz + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1), kind=wp)/2._wp - rho_rz = rho_rz + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1), kind=wp)/2._wp - end if - fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_lx = rho_lx + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j - 1, k, l), kind=wp)/2._wp + rho_rx = rho_rx + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j + 1, k, l), kind=wp)/2._wp + rho_ly = rho_ly + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k - 1, l), kind=wp)/2._wp + rho_ry = rho_ry + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k + 1, l), kind=wp)/2._wp + if (p > 0) then + rho_lz = rho_lz + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l - 1), kind=wp)/2._wp + rho_rz = rho_rz + real(q_cons_vf(i)%sf(j, k, l) + q_cons_vf(i)%sf(j, k, l + 1), kind=wp)/2._wp + end if + fd_coeff = fd_coeff + q_cons_vf(i)%sf(j, k, l) + end do + + fd_coeff = 1._wp/fd_coeff + alf_igr* & + ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & + (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) - fd_coeff = 1._wp/fd_coeff + alf_igr* & - ((1._wp/dx(j)**2._wp)*(1._wp/rho_lx + 1._wp/rho_rx) + & - (1._wp/dy(k)**2._wp)*(1._wp/rho_ly + 1._wp/rho_ry)) + if (num_dims == 3) then + fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) + end if + if (igr_iter_solver == 1) then ! Jacobi iteration if (num_dims == 3) then - fd_coeff = fd_coeff + alf_igr*(1._wp/dz(l)**2._wp)*(1._wp/rho_lz + 1._wp/rho_rz) + jac(j, k, l) = real((alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & + real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) + else + jac(j, k, l) = real((alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(real(jac_old(j - 1, k, l), kind=wp)/rho_lx + real(jac_old(j + 1, k, l), kind=wp)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(real(jac_old(j, k - 1, l), kind=wp)/rho_ly + real(jac_old(j, k + 1, l), kind=wp)/rho_ry)) + & + real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) end if - - if (igr_iter_solver == 1) then ! Jacobi iteration - if (num_dims == 3) then - jac(j, k, l) = real((alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac_old(j - 1, k, l)/rho_lx + jac_old(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac_old(j, k - 1, l)/rho_ly + jac_old(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac_old(j, k, l - 1)/rho_lz + jac_old(j, k, l + 1)/rho_rz)) + & - real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) - else - jac(j, k, l) = real((alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(real(jac_old(j - 1, k, l), kind=wp)/rho_lx + real(jac_old(j + 1, k, l), kind=wp)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(real(jac_old(j, k - 1, l), kind=wp)/rho_ly + real(jac_old(j, k + 1, l), kind=wp)/rho_ry)) + & - real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) - end if - else ! Gauss Seidel iteration - if (num_dims == 3) then - jac(j, k, l) = real((alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & - (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & - real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) - else - jac(j, k, l) = real((alf_igr/fd_coeff)* & - ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & - (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & - real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) - end if + else ! Gauss Seidel iteration + if (num_dims == 3) then + jac(j, k, l) = real((alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry) + & + (1._wp/dz(l)**2._wp)*(jac(j, k, l - 1)/rho_lz + jac(j, k, l + 1)/rho_rz)) + & + real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) + else + jac(j, k, l) = real((alf_igr/fd_coeff)* & + ((1._wp/dx(j)**2._wp)*(jac(j - 1, k, l)/rho_lx + jac(j + 1, k, l)/rho_rx) + & + (1._wp/dy(k)**2._wp)*(jac(j, k - 1, l)/rho_ly + jac(j, k + 1, l)/rho_ry)) + & + real(jac_rhs(j, k, l), kind=wp)/fd_coeff, kind=stp) end if - end do + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() call s_populate_F_igr_buffers(bc_type, jac_sf) if (igr_iter_solver == 1) then ! Jacobi iteration $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - jac_old(j, k, l) = jac(j, k, l) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + jac_old(j, k, l) = jac(j, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end do @@ -390,68 +390,68 @@ contains #:endif $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,F_L, vel_L, alpha_rho_L, F_R, vel_R, alpha_rho_R, rho_L, rho_R]') - do l = 0, p - do k = 0, n - do j = -1, m - - F_L = 0._wp; F_R = 0._wp - vel_L = 0._wp; vel_R = 0._wp - rho_L = 0._wp; rho_R = 0._wp + do l = 0, p + do k = 0, n + do j = -1, m + + F_L = 0._wp; F_R = 0._wp + vel_L = 0._wp; vel_R = 0._wp + rho_L = 0._wp; rho_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) - end do - - vel_L = vel_L + coeff_L(q + offxL)*q_cons_vf(momxb)%sf(j + q, k, l) - F_L = F_L + coeff_L(q + offxL)*jac(j + q, k, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) - end do - - vel_R = vel_R + coeff_R(q + offxR)*q_cons_vf(momxb)%sf(j + q, k, l) - F_R = F_R + coeff_R(q + offxR)*jac(j + q, k, l) - end do + vel_L = vel_L + coeff_L(q + offxL)*q_cons_vf(momxb)%sf(j + q, k, l) + F_L = F_L + coeff_L(q + offxL)*jac(j + q, k, l) + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - rho_R = rho_R + alpha_rho_R(i) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) end do - vel_L = vel_L/rho_L - vel_R = vel_R/rho_R + vel_R = vel_R + coeff_R(q + offxR)*q_cons_vf(momxb)%sf(j + q, k, l) + F_R = F_R + coeff_R(q + offxR)*jac(j + q, k, l) + end do - #:for LR in ['L', 'R'] - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - real(0.5_wp*dt*F_${LR}$*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - real(0.5_wp*dt*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*F_${LR}$*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*vel_${LR}$*F_${LR}$*(1._wp/dx(j)), kind=stp) - #:endfor + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + rho_R = rho_R + alpha_rho_R(i) end do + + vel_L = vel_L/rho_L + vel_R = vel_R/rho_R + + #:for LR in ['L', 'R'] + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + real(0.5_wp*dt*F_${LR}$*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + real(0.5_wp*dt*vel_${LR}$*F_${LR}$*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*F_${LR}$*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*vel_${LR}$*F_${LR}$*(1._wp/dx(j)), kind=stp) + #:endfor end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_igr_sigma_x @@ -488,215 +488,679 @@ contains if (idir == 1) then if (p == 0) then $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + do l = 0, p + do k = 0, n + do j = -1, m - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - !x-direction contributions + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) end do + rho_sf_small(i) = rho_L + end do - dvel_small(1) = (1/(2._wp*dx(j)))*( & - 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel(i, 1) = dvel_small(i) - end do - end if - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(1))/3._wp - end if + dvel_small(1) = (1/(2._wp*dx(j)))*( & + 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - !y-direction contributions + if (q == 0) then $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L + do i = 1, num_dims + dvel(i, 1) = dvel_small(i) end do + end if - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - - if (q == 0) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel(i, 2) = dvel_small(i) - end do - end if - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp - end if - - if (q == 0) then - jac_rhs(j, k, l) = real(alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + (dvel(1, 1) + dvel(2, 2))**2._wp), kind=stp) - end if - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - alpha_L(i) = 0._wp - alpha_R(i) = 0._wp - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = 0._wp - vel_R(i) = 0._wp - end do + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(1))/3._wp + end if + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) end do + rho_sf_small(i) = rho_L + end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + dvel(i, 2) = dvel_small(i) end do - end do + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) - end do + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp + end if - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + if (q == 0) then + jac_rhs(j, k, l) = real(alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + (dvel(1, 1) + dvel(2, 2))**2._wp), kind=stp) + end if + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + alpha_L(i) = 0._wp + alpha_R(i) = 0._wp + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = 0._wp + vel_R(i) = 0._wp + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then - - alpha_L(num_fluids) = 1._wp - alpha_R(num_fluids) = 1._wp - $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) - alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do + else + alpha_L(1) = 1._wp end if - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i)/rho_L - vel_R(i) = vel_R(i)/rho_R + vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do + end do - if (viscous) then - mu_L = 0._wp; mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do + if (num_fluids > 1) then - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)), kind=stp) + alpha_L(num_fluids) = 1._wp + alpha_R(num_fluids) = 1._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) + alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)), kind=stp) + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i)/rho_L + vel_R(i) = vel_R(i)/rho_R + end do + + if (viscous) then + mu_L = 0._wp; mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)), kind=stp) + end if + + E_L = 0._wp; E_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j))), kind=stp) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))), kind=stp) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real((0.5_wp*dt*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j))), kind=stp) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j))), kind=stp) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))), kind=stp) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real((0.5_wp*dt*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j))), kind=stp) + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m + + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp + + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel(i, 1) = dvel_small(i) + end do + end if + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(1))/3._wp + end if + + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (q == 0) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel(i, 2) = dvel_small(i) + end do + end if + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) + if (q == 0) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel(i, 3) = dvel_small(i) + end do + end if + + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(3))/3._wp + end if + + if (q == 0) then + jac_rhs(j, k, l) = real(alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & + + dvel(1, 3)*dvel(3, 1) & + + dvel(2, 3)*dvel(3, 2)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + dvel(3, 3)**2._wp & + + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp), kind=stp) + end if + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + alpha_L(i) = 0._wp + alpha_R(i) = 0._wp + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = 0._wp + vel_R(i) = 0._wp + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_L(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do + end do + + if (num_fluids > 1) then + + alpha_L(num_fluids) = 1._wp + alpha_R(num_fluids) = 1._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) + alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + end do + end if + + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i)/rho_L + vel_R(i) = vel_R(i)/rho_R + end do + + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & @@ -705,6 +1169,34 @@ contains rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)), kind=stp) @@ -750,6 +1242,7 @@ contains E_R, gamma_R, pi_inf_R, rho_R, vel_R, & pres_L, pres_R, cfl) + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & @@ -775,17 +1268,17 @@ contains $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))), kind=stp) + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j))), kind=stp) + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j)), kind=stp) $:GPU_ATOMIC(atomic='update') rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))), kind=stp) + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j)), kind=stp) end do end if @@ -800,6 +1293,11 @@ contains real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & real((0.5_wp*dt*(vel_L(1)*(E_L + & @@ -817,6 +1315,11 @@ contains real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & real((0.5_wp*dt*(vel_L(1)*(E_L + & @@ -874,6 +1377,11 @@ contains real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & + real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & real((0.5_wp*dt*(vel_R(1)*(E_R + & @@ -891,6 +1399,11 @@ contains real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & real((0.5_wp*dt*(vel_R(1)*(E_R + & @@ -900,525 +1413,408 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP() - else - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + $:END_GPU_PARALLEL_LOOP() + #:endif + end if + else if (idir == 2) then + if (p == 0) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n + do j = 0, m - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - - if (q == 0) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel(i, 1) = dvel_small(i) - end do - end if - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - if (q == 0) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel(i, 2) = dvel_small(i) - end do - end if - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k, l + i) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j + q, k, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j + q, k, l - 1)/rho_sf_small(-1)) - if (q == 0) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel(i, 3) = dvel_small(i) - end do - end if - - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(3))/3._wp - end if - - if (q == 0) then - jac_rhs(j, k, l) = real(alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1) & - + dvel(1, 3)*dvel(3, 1) & - + dvel(2, 3)*dvel(3, 2)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + dvel(3, 3)**2._wp & - + (dvel(1, 1) + dvel(2, 2) + dvel(3, 3))**2._wp), kind=stp) - end if - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - alpha_L(i) = 0._wp - alpha_R(i) = 0._wp - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = 0._wp - vel_R(i) = 0._wp - end do + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_L(1) = 1._wp - end if - + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) end do + rho_sf_small(i) = rho_L end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) - end do + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp + end if + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) end do + rho_sf_small(i) = rho_L end do - if (num_fluids > 1) then - - alpha_L(num_fluids) = 1._wp - alpha_R(num_fluids) = 1._wp + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) - alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) - end do + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(2))/3._wp end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(2))/3._wp + end if + end do + end if - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + alpha_L(i) = 0._wp + alpha_R(i) = 0._wp + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = 0._wp + vel_R(i) = 0._wp + end do - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k + q, l) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i)/rho_L - vel_R(i) = vel_R(i)/rho_R + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do + else + alpha_L(1) = 1._wp + end if - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)), kind=stp) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)), kind=stp) + if (num_fluids > 1) then - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j + 1)), kind=stp) + alpha_L(num_fluids) = 1._wp + alpha_R(num_fluids) = 1._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dx(j)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) + alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j + 1)), kind=stp) + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dx(j)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)), kind=stp) + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i)/rho_L + vel_R(i) = vel_R(i)/rho_R + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)), kind=stp) + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)), kind=stp) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)), kind=stp) - E_L = 0._wp; E_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)), kind=stp) - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j))), kind=stp) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)), kind=stp) + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j)), kind=stp) - end do - end if + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q + offxL)*jac(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q + offxR)*jac(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dx(j + 1))), kind=stp) + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j + 1))), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k)), kind=stp) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dx(j))), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real((0.5_wp*dt*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j))), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k)), kind=stp) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j))), kind=stp) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1)), kind=stp) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))), kind=stp) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j + 1, k, l) = rhs_vf(momxb + 2)%sf(j + 1, k, l) + & - real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j + 1))), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k)), kind=stp) + end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dx(j))), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real((0.5_wp*dt*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j))), kind=stp) - + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k)), kind=stp) end do - end do + end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k)), kind=stp) end do - $:END_GPU_PARALLEL_LOOP() - #:endif - end if - else if (idir == 2) then - if (p == 0) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + end do + end do + $:END_GPU_PARALLEL_LOOP() + else + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = -1, n do j = 0, m @@ -1475,20 +1871,51 @@ contains rho_sf_small(i) = rho_L end do - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(2))/3._wp + end if + + !z-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) + end do + rho_sf_small(i) = rho_L + end do + + dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(2))/3._wp + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(3))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(2))/3._wp + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(3))/3._wp end if end do end if @@ -1620,6 +2047,34 @@ contains rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)), kind=stp) @@ -1719,6 +2174,11 @@ contains real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & real(0.5_wp*dt*(vel_L(2)*(E_L + & @@ -1737,1048 +2197,588 @@ contains 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k)), kind=stp) - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k)), kind=stp) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k)), kind=stp) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k)), kind=stp) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - else - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m - - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp - - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp - end if - - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(2))/3._wp - end if - - !z-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + q, l + i) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k + q, l - 1)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(3))/3._wp - end if - end do - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - alpha_L(i) = 0._wp - alpha_R(i) = 0._wp - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = 0._wp - vel_R(i) = 0._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k + q, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_L(1) = 1._wp - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k + q, l) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_R(1) = 1._wp - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) - end do - end do - - if (num_fluids > 1) then - - alpha_L(num_fluids) = 1._wp - alpha_R(num_fluids) = 1._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) - alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) - end do - end if - - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i)/rho_L - vel_R(i) = vel_R(i)/rho_R - end do - - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R - end do - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(3)*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(3)*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)), kind=stp) - end if - - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q + offxL)*jac(j, k + q, l) - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q + offxR)*jac(j, k + q, l) - end do - - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k)), kind=stp) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k)), kind=stp) - end do - end if - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k)), kind=stp) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + real(0.5_wp*dt*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - real(0.5_wp*dt*rho_L*vel_L(3)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dy(k)), kind=stp) + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k)), kind=stp) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k)), kind=stp) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1)), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k)), kind=stp) - end do + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k)), kind=stp) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k)), kind=stp) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k + 1, l) = rhs_vf(momxb + 2)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k)), kind=stp) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endif end if elseif (idir == 3) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = -1, p - do k = 0, n - do j = 0, m + do l = -1, p + do k = 0, n + do j = 0, m - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - - !x-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) - end do - rho_sf_small(i) = rho_L - end do - - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp - end if + #:endif + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - !y-direction contributions + !x-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) - end do - rho_sf_small(i) = rho_L + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k, l + q) end do + rho_sf_small(i) = rho_L + end do - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 2)%sf(j + 1, k, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp - end if + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp + end if - !z-direction contributions + !y-direction contributions + $:GPU_LOOP(parallelism='[seq]') + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) - end do - rho_sf_small(i) = rho_L + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i, l + q) end do - dvel_small(1) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) - dvel_small(3) = (1/(2._wp*dz(l)))*( & - q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & - q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(3))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(3))/3._wp - end if + rho_sf_small(i) = rho_L end do - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - alpha_L(i) = 0._wp - alpha_R(i) = 0._wp - end do + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1, l + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 2)%sf(j, k + 1, l + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = 0._wp - vel_R(i) = 0._wp - end do + if (q > vidxb) then + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe + !z-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k, l + q) - end do - - if (num_fluids > 1) then + do i = -1, 1 + rho_L = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k, l + i + q) end do - else - alpha_L(1) = 1._wp - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + rho_sf_small(i) = rho_L end do + dvel_small(1) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 1)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + dvel_small(3) = (1/(2._wp*dz(l)))*( & + q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & + q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(3))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(3))/3._wp + end if end do + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k, l + q) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + alpha_L(i) = 0._wp + alpha_R(i) = 0._wp + end do - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k, l + q) - end do - else - alpha_R(1) = 1._wp - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = 0._wp + vel_R(i) = 0._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) - end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k, l + q) end do if (num_fluids > 1) then - - alpha_L(num_fluids) = 1._wp - alpha_R(num_fluids) = 1._wp - $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) - alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k, l + q) end do + else + alpha_L(1) = 1._wp end if - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) end do + end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i)/rho_L - vel_R(i) = vel_R(i)/rho_R + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k, l + q) end do - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k, l + q) end do + else + alpha_R(1) = 1._wp + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + end do + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)), kind=stp) + if (num_fluids > 1) then - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)), kind=stp) + alpha_L(num_fluids) = 1._wp + alpha_R(num_fluids) = 1._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) + alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)), kind=stp) + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)), kind=stp) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_L = F_L + coeff_L(q + offxL)*jac(j, k, l + q) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i)/rho_L + vel_R(i) = vel_R(i)/rho_R + end do + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_R = F_R + coeff_R(q + offxR)*jac(j, k, l + q) + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R end do - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dz(l)), kind=stp) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dz(l)), kind=stp) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l + 1)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(3))*(1._wp/dz(l)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l)), kind=stp) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(2)*vel_L(2)*(1._wp/dz(l)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1)), kind=stp) + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1)), kind=stp) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(2)*vel_R(2)*(1._wp/dz(l)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - real(0.5_wp*dt*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1)), kind=stp) + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l + 1)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dz(l + 1)), kind=stp) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(3)*(1._wp/dz(l)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & - real(0.5_wp*dt*(rho_L*(vel_L(3))**2.0 + & - pres_L + F_L)*(1._wp/dz(l)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dz(l)), kind=stp) + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dz(l)), kind=stp) + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(3)*(1._wp/dz(l)), kind=stp) + end if + + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_L = F_L + coeff_L(q + offxL)*jac(j, k, l + q) + end do + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_R = F_R + coeff_R(q + offxR)*jac(j, k, l + q) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dz(l)), kind=stp) + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + real(0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dz(l + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_L(3)*(E_L + & - pres_L + F_L))*(1._wp/dz(l)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dz(l)), kind=stp) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dz(l)), kind=stp) + end do + if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids - 1 $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1)), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(3))*(1._wp/dz(l + 1)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dz(l + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dz(l)), kind=stp) + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_L(3)*(1._wp/dz(l + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(3))*(1._wp/dz(l)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dz(l)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(3)*(1._wp/dz(l)), kind=stp) end do + end if - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l + 1)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & + real(0.5_wp*dt*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(3))*(1._wp/dz(l)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + real(0.5_wp*dt*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l)), kind=stp) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & + real(0.5_wp*dt*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l + 1)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + real(0.5_wp*dt*(rho_L*(vel_L(3))**2.0 + & + pres_L + F_L)*(1._wp/dz(l)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(3))*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & - real(0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dz(l)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real(0.5_wp*dt*rho_L*vel_L(2)*vel_L(3)*(1._wp/dz(l)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dz(l)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_L(3)*(E_L + & + pres_L + F_L))*(1._wp/dz(l)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dz(l)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & - real(0.5_wp*dt*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l + 1)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dz(l + 1)), kind=stp) + rhs_vf(i)%sf(j, k, l + 1) = rhs_vf(i)%sf(j, k, l + 1) + & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dz(l + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dz(l)), kind=stp) + end do + + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) + & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(3))*(1._wp/dz(l + 1)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dz(l + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l + 1) = rhs_vf(advxb + i - 1)%sf(j, k, l + 1) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l + 1)*vel_R(3)*(1._wp/dz(l + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(3))*(1._wp/dz(l)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dz(l)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(3)*(1._wp/dz(l)), kind=stp) + end do + end if + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l + 1) = rhs_vf(momxb + 2)%sf(j, k, l + 1) + & real(0.5_wp*dt*(rho_R*(vel_R(3))**2.0 + & - pres_R + F_R)*(1._wp/dz(l)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dz(l)), kind=stp) + pres_R + F_R)*(1._wp/dz(l + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l + 1) = rhs_vf(momxb)%sf(j, k, l + 1) + & + real(0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dz(l)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l + 1) = rhs_vf(momxb + 1)%sf(j, k, l + 1) + & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dz(l + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l + 1) = rhs_vf(E_idx)%sf(j, k, l + 1) + & real(0.5_wp*dt*(vel_R(3)*(E_R + & - pres_R + F_R))*(1._wp/dz(l)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dz(l)), kind=stp) + pres_R + F_R))*(1._wp/dz(l + 1)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dz(l + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 2)%sf(j, k, l) = rhs_vf(momxb + 2)%sf(j, k, l) - & + real(0.5_wp*dt*(rho_R*(vel_R(3))**2.0 + & + pres_R + F_R)*(1._wp/dz(l)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(3))*(1._wp/dz(l)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_R*vel_R(1)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dz(l)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(3)*(1._wp/dz(l)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dz(l)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_R(3)*(E_R + & + pres_R + F_R))*(1._wp/dz(l)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dz(l)), kind=stp) - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endif end if @@ -2843,47 +2843,47 @@ contains if (idir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & - (flux_vf(i)%sf(j - 1, k, l) & - - flux_vf(i)%sf(j, k, l)) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = 1._wp/dx(j)* & + (flux_vf(i)%sf(j - 1, k, l) & + - flux_vf(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & - (flux_vf(i)%sf(j, k - 1, l) & - - flux_vf(i)%sf(j, k, l)) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dy(k)* & + (flux_vf(i)%sf(j, k - 1, l) & + - flux_vf(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (idir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - rhs_vf(i)%sf(j, k, l) = & - rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & - (flux_vf(i)%sf(j, k, l - 1) & - - flux_vf(i)%sf(j, k, l)) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + rhs_vf(i)%sf(j, k, l) = & + rhs_vf(i)%sf(j, k, l) + 1._wp/dz(l)* & + (flux_vf(i)%sf(j, k, l - 1) & + - flux_vf(i)%sf(j, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -2932,4 +2932,4 @@ contains end subroutine s_finalize_igr_module -end module m_igr \ No newline at end of file +end module m_igr diff --git a/src/simulation/m_mhd.fpp b/src/simulation/m_mhd.fpp index d0933c500..3a588839c 100644 --- a/src/simulation/m_mhd.fpp +++ b/src/simulation/m_mhd.fpp @@ -77,54 +77,54 @@ contains real(wp) :: divB, vdotB $:GPU_PARALLEL_LOOP(collapse=3, private='[k,l,q,v,B,r,divB,vdotB]') - do q = 0, p - do l = 0, n - do k = 0, m - - divB = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) - end do + do q = 0, p + do l = 0, n + do k = 0, m + + divB = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg)%sf(k + r, l, q)*fd_coeff_x_h(r, k) + end do + $:GPU_LOOP(parallelism='[seq]') + do r = -fd_number, fd_number + divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + end do + if (p > 0) then $:GPU_LOOP(parallelism='[seq]') do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 1)%sf(k, l + r, q)*fd_coeff_y_h(r, l) + divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) end do - if (p > 0) then - $:GPU_LOOP(parallelism='[seq]') - do r = -fd_number, fd_number - divB = divB + q_prim_vf(B_idx%beg + 2)%sf(k, l, q + r)*fd_coeff_z_h(r, q) - end do - end if - - v(1) = q_prim_vf(momxb)%sf(k, l, q) - v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) - v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) - - B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) - B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) - B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) - - vdotB = sum(v*B) - - ! 1: rho -> unchanged - ! 2: vx -> - (divB) * Bx - ! 3: vy -> - (divB) * By - ! 4: vz -> - (divB) * Bz - ! 5: E -> - (divB) * (vdotB) - ! 6: Bx -> - (divB) * vx - ! 7: By -> - (divB) * vy - ! 8: Bz -> - (divB) * vz - - rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) - rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) - rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) - - rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB - - rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) - rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) - rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) + end if + + v(1) = q_prim_vf(momxb)%sf(k, l, q) + v(2) = q_prim_vf(momxb + 1)%sf(k, l, q) + v(3) = q_prim_vf(momxb + 2)%sf(k, l, q) + + B(1) = q_prim_vf(B_idx%beg)%sf(k, l, q) + B(2) = q_prim_vf(B_idx%beg + 1)%sf(k, l, q) + B(3) = q_prim_vf(B_idx%beg + 2)%sf(k, l, q) + + vdotB = sum(v*B) + + ! 1: rho -> unchanged + ! 2: vx -> - (divB) * Bx + ! 3: vy -> - (divB) * By + ! 4: vz -> - (divB) * Bz + ! 5: E -> - (divB) * (vdotB) + ! 6: Bx -> - (divB) * vx + ! 7: By -> - (divB) * vy + ! 8: Bz -> - (divB) * vz + + rhs_vf(momxb)%sf(k, l, q) = rhs_vf(momxb)%sf(k, l, q) - divB*B(1) + rhs_vf(momxb + 1)%sf(k, l, q) = rhs_vf(momxb + 1)%sf(k, l, q) - divB*B(2) + rhs_vf(momxb + 2)%sf(k, l, q) = rhs_vf(momxb + 2)%sf(k, l, q) - divB*B(3) + + rhs_vf(E_idx)%sf(k, l, q) = rhs_vf(E_idx)%sf(k, l, q) - divB*vdotB + + rhs_vf(B_idx%beg)%sf(k, l, q) = rhs_vf(B_idx%beg)%sf(k, l, q) - divB*v(1) + rhs_vf(B_idx%beg + 1)%sf(k, l, q) = rhs_vf(B_idx%beg + 1)%sf(k, l, q) - divB*v(2) + rhs_vf(B_idx%beg + 2)%sf(k, l, q) = rhs_vf(B_idx%beg + 2)%sf(k, l, q) - divB*v(3) end do end do diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index 9936e4e6b..c9d5b4bff 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -132,251 +132,251 @@ contains $:END_GPU_PARALLEL_LOOP() else if (muscl_dir == 2) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (muscl_dir == 3) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if - else if (muscl_order == 2) then + else if (muscl_order == 2) then ! MUSCL Reconstruction #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,slopeL,slopeR,slope]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - do i = 1, v_size - - slopeL = v_rs_ws_${XYZ}$_muscl(j + 1, k, l, i) - & - v_rs_ws_${XYZ}$_muscl(j, k, l, i) - slopeR = v_rs_ws_${XYZ}$_muscl(j, k, l, i) - & - v_rs_ws_${XYZ}$_muscl(j - 1, k, l, i) - slope = 0._wp - - if (muscl_lim == 1) then ! minmod - if (slopeL*slopeR > 1e-9_wp) then - slope = min(abs(slopeL), abs(slopeR)) - end if - if (slopeL < 0._wp) slope = -slope - elseif (muscl_lim == 2) then ! MC - if (slopeL*slopeR > 1e-9_wp) then - slope = min(2._wp*abs(slopeL), 2._wp*abs(slopeR)) - slope = min(slope, 5e-1_wp*(abs(slopeL) + abs(slopeR))) - end if - if (slopeL < 0._wp) slope = -slope - elseif (muscl_lim == 3) then ! Van Albada - if (abs(slopeL) > 1e-6_wp .and. abs(slopeR) > 1e-6_wp .and. & - abs(slopeL + slopeR) > 1e-6_wp .and. slopeL*slopeR > 1e-6_wp) then - slope = ((slopeL + slopeR)*slopeL*slopeR)/(slopeL**2._wp + slopeR**2._wp) - end if - elseif (muscl_lim == 4) then ! Van Leer - if (abs(slopeL + slopeR) > 1.e-6_wp .and. slopeL*slopeR > 1.e-6_wp) then - slope = 2._wp*slopeL*slopeR/(slopeL + slopeR) - end if - elseif (muscl_lim == 5) then ! SUPERBEE - if (slopeL*slopeR > 1e-6_wp) then - slope = -1._wp*min(-min(2._wp*abs(slopeL), abs(slopeR)), -min(abs(slopeL), 2._wp*abs(slopeR))) - end if + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + do i = 1, v_size + + slopeL = v_rs_ws_${XYZ}$_muscl(j + 1, k, l, i) - & + v_rs_ws_${XYZ}$_muscl(j, k, l, i) + slopeR = v_rs_ws_${XYZ}$_muscl(j, k, l, i) - & + v_rs_ws_${XYZ}$_muscl(j - 1, k, l, i) + slope = 0._wp + + if (muscl_lim == 1) then ! minmod + if (slopeL*slopeR > 1e-9_wp) then + slope = min(abs(slopeL), abs(slopeR)) + end if + if (slopeL < 0._wp) slope = -slope + elseif (muscl_lim == 2) then ! MC + if (slopeL*slopeR > 1e-9_wp) then + slope = min(2._wp*abs(slopeL), 2._wp*abs(slopeR)) + slope = min(slope, 5e-1_wp*(abs(slopeL) + abs(slopeR))) + end if + if (slopeL < 0._wp) slope = -slope + elseif (muscl_lim == 3) then ! Van Albada + if (abs(slopeL) > 1e-6_wp .and. abs(slopeR) > 1e-6_wp .and. & + abs(slopeL + slopeR) > 1e-6_wp .and. slopeL*slopeR > 1e-6_wp) then + slope = ((slopeL + slopeR)*slopeL*slopeR)/(slopeL**2._wp + slopeR**2._wp) + end if + elseif (muscl_lim == 4) then ! Van Leer + if (abs(slopeL + slopeR) > 1.e-6_wp .and. slopeL*slopeR > 1.e-6_wp) then + slope = 2._wp*slopeL*slopeR/(slopeL + slopeR) + end if + elseif (muscl_lim == 5) then ! SUPERBEE + if (slopeL*slopeR > 1e-6_wp) then + slope = -1._wp*min(-min(2._wp*abs(slopeL), abs(slopeR)), -min(abs(slopeL), 2._wp*abs(slopeR))) end if + end if - ! reconstruct from left side - vL_rs_vf_${XYZ}$ (j, k, l, i) = & - v_rs_ws_${XYZ}$_muscl(j, k, l, i) - (5.e-1_wp*slope) + ! reconstruct from left side + vL_rs_vf_${XYZ}$ (j, k, l, i) = & + v_rs_ws_${XYZ}$_muscl(j, k, l, i) - (5.e-1_wp*slope) - ! reconstruct from the right side - vR_rs_vf_${XYZ}$ (j, k, l, i) = & - v_rs_ws_${XYZ}$_muscl(j, k, l, i) + (5.e-1_wp*slope) + ! reconstruct from the right side + vR_rs_vf_${XYZ}$ (j, k, l, i) = & + v_rs_ws_${XYZ}$_muscl(j, k, l, i) + (5.e-1_wp*slope) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endfor - end if + end if + + if (int_comp) then + call s_interface_compression(vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, & + vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, & + muscl_dir, is1_muscl_d, is2_muscl_d, is3_muscl_d) + end if + + end subroutine s_muscl + + subroutine s_interface_compression(vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, & + muscl_dir, & + is1_muscl_d, is2_muscl_d, is3_muscl_d) + + real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: & + vL_rs_vf_x, vL_rs_vf_y, & + vL_rs_vf_z, vR_rs_vf_x, & + vR_rs_vf_y, vR_rs_vf_z + integer, intent(in) :: muscl_dir + type(int_bounds_info), intent(in) :: is1_muscl_d, is2_muscl_d, is3_muscl_d + + integer :: j, k, l + + real(wp) :: aCL, aCR, aC, aTHINC, qmin, qmax, A, B, C, sign, moncon + + #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + if (muscl_dir == ${MUSCL_DIR}$) then + + $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') + do l = is3_muscl%beg, is3_muscl%end + do k = is2_muscl%beg, is2_muscl%end + do j = is1_muscl%beg, is1_muscl%end + + aCL = v_rs_ws_${XYZ}$_muscl(j - 1, k, l, advxb) + aC = v_rs_ws_${XYZ}$_muscl(j, k, l, advxb) + aCR = v_rs_ws_${XYZ}$_muscl(j + 1, k, l, advxb) + + moncon = (aCR - aC)*(aC - aCL) + + if (aC >= ic_eps .and. aC <= 1._wp - ic_eps .and. moncon > moncon_cutoff) then ! Interface cell - if (int_comp) then - call s_interface_compression(vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, & - vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, & - muscl_dir, is1_muscl_d, is2_muscl_d, is3_muscl_d) + if (aCR - aCL > 0._wp) then + sign = 1._wp + else + sign = -1._wp + end if + + qmin = min(aCR, aCL) + qmax = max(aCR, aCL) - qmin + + C = (aC - qmin + sgm_eps)/(qmax + sgm_eps) + B = exp(sign*ic_beta*(2._wp*C - 1._wp)) + A = (B/cosh(ic_beta) - 1._wp)/tanh(ic_beta) + + ! Left reconstruction + aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*A) + if (aTHINC < ic_eps) aTHINC = ic_eps + if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps + vL_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & + vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC + vL_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & + (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) + vL_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC + vL_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + + ! Right reconstruction + aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*(tanh(ic_beta) + A)/(1._wp + A*tanh(ic_beta))) + if (aTHINC < ic_eps) aTHINC = ic_eps + if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps + vR_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & + vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC + vR_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & + (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) + vR_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC + vR_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + + end if + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() end if + #:endfor - end subroutine s_muscl - - subroutine s_interface_compression(vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, & - muscl_dir, & - is1_muscl_d, is2_muscl_d, is3_muscl_d) - - real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: & - vL_rs_vf_x, vL_rs_vf_y, & - vL_rs_vf_z, vR_rs_vf_x, & - vR_rs_vf_y, vR_rs_vf_z - integer, intent(in) :: muscl_dir - type(int_bounds_info), intent(in) :: is1_muscl_d, is2_muscl_d, is3_muscl_d - - integer :: j, k, l - - real(wp) :: aCL, aCR, aC, aTHINC, qmin, qmax, A, B, C, sign, moncon - - #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] - if (muscl_dir == ${MUSCL_DIR}$) then - - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,aCL,aC,aCR,aTHINC,moncon,sign,qmin,qmax]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - - aCL = v_rs_ws_${XYZ}$_muscl(j - 1, k, l, advxb) - aC = v_rs_ws_${XYZ}$_muscl(j, k, l, advxb) - aCR = v_rs_ws_${XYZ}$_muscl(j + 1, k, l, advxb) - - moncon = (aCR - aC)*(aC - aCL) - - if (aC >= ic_eps .and. aC <= 1._wp - ic_eps .and. moncon > moncon_cutoff) then ! Interface cell - - if (aCR - aCL > 0._wp) then - sign = 1._wp - else - sign = -1._wp - end if - - qmin = min(aCR, aCL) - qmax = max(aCR, aCL) - qmin - - C = (aC - qmin + sgm_eps)/(qmax + sgm_eps) - B = exp(sign*ic_beta*(2._wp*C - 1._wp)) - A = (B/cosh(ic_beta) - 1._wp)/tanh(ic_beta) - - ! Left reconstruction - aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*A) - if (aTHINC < ic_eps) aTHINC = ic_eps - if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps - vL_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & - vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & - (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) - vL_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC - - ! Right reconstruction - aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sign*(tanh(ic_beta) + A)/(1._wp + A*tanh(ic_beta))) - if (aTHINC < ic_eps) aTHINC = ic_eps - if (aTHINC > 1 - ic_eps) aTHINC = 1 - ic_eps - vR_rs_vf_${XYZ}$ (j, k, l, contxb) = vL_rs_vf_${XYZ}$ (j, k, l, contxb)/ & - vL_rs_vf_${XYZ}$ (j, k, l, advxb)*aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, contxe) = vL_rs_vf_${XYZ}$ (j, k, l, contxe)/ & - (1._wp - vL_rs_vf_${XYZ}$ (j, k, l, advxb))*(1._wp - aTHINC) - vR_rs_vf_${XYZ}$ (j, k, l, advxb) = aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, advxe) = 1 - aTHINC + end subroutine s_interface_compression - end if + subroutine s_initialize_muscl(v_vf, muscl_dir) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - #:endfor - - end subroutine s_interface_compression - - subroutine s_initialize_muscl(v_vf, muscl_dir) - - type(scalar_field), dimension(:), intent(in) :: v_vf - integer, intent(in) :: muscl_dir - - integer :: j, k, l, q !< Generic loop iterators - - ! Determining the number of cell-average variables which will be - ! muscl-reconstructed and mapping their indical bounds in the x-, - ! y- and z-directions to those in the s1-, s2- and s3-directions - ! as to reshape the inputted data in the coordinate direction of - ! the muscl reconstruction - v_size = ubound(v_vf, 1) - $:GPU_UPDATE(device='[v_size]') - - if (muscl_dir == 1) then - $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_x_muscl(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do - end do - end do + type(scalar_field), dimension(:), intent(in) :: v_vf + integer, intent(in) :: muscl_dir + + integer :: j, k, l, q !< Generic loop iterators + + ! Determining the number of cell-average variables which will be + ! muscl-reconstructed and mapping their indical bounds in the x-, + ! y- and z-directions to those in the s1-, s2- and s3-directions + ! as to reshape the inputted data in the coordinate direction of + ! the muscl reconstruction + v_size = ubound(v_vf, 1) + $:GPU_UPDATE(device='[v_size]') + + if (muscl_dir == 1) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_x_muscl(k, l, q, j) = v_vf(j)%sf(k, l, q) end do - $:END_GPU_PARALLEL_LOOP() - end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if - ! Reshaping/Projecting onto Characteristic Fields in y-direction - if (n == 0) return - - if (muscl_dir == 2) then - $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_y_muscl(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do - end do - end do + ! Reshaping/Projecting onto Characteristic Fields in y-direction + if (n == 0) return + + if (muscl_dir == 2) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_y_muscl(k, l, q, j) = v_vf(j)%sf(l, k, q) end do - $:END_GPU_PARALLEL_LOOP() - end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if - ! Reshaping/Projecting onto Characteristic Fields in z-direction - if (p == 0) return - if (muscl_dir == 3) then - $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_z_muscl(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do - end do - end do + ! Reshaping/Projecting onto Characteristic Fields in z-direction + if (p == 0) return + if (muscl_dir == 3) then + $:GPU_PARALLEL_LOOP(private='[j,k,l,q]', collapse=4) + do j = 1, v_size + do q = is3_muscl%beg, is3_muscl%end + do l = is2_muscl%beg, is2_muscl%end + do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn + v_rs_ws_z_muscl(k, l, q, j) = v_vf(j)%sf(q, l, k) end do - $:END_GPU_PARALLEL_LOOP() - end if + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if - end subroutine s_initialize_muscl + end subroutine s_initialize_muscl - subroutine s_finalize_muscl_module() + subroutine s_finalize_muscl_module() - @:DEALLOCATE(v_rs_ws_x_muscl) + @:DEALLOCATE(v_rs_ws_x_muscl) - if (n == 0) return + if (n == 0) return - @:DEALLOCATE(v_rs_ws_y_muscl) + @:DEALLOCATE(v_rs_ws_y_muscl) - if (p == 0) return + if (p == 0) return - @:DEALLOCATE(v_rs_ws_z_muscl) + @:DEALLOCATE(v_rs_ws_z_muscl) - end subroutine s_finalize_muscl_module - end module m_muscl \ No newline at end of file + end subroutine s_finalize_muscl_module +end module m_muscl diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 3abb23d00..408348a1c 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -726,9 +726,9 @@ contains $:GPU_UPDATE(device='[is1_qbmm,is2_qbmm,is3_qbmm]') $:GPU_PARALLEL_LOOP(collapse=3, private='[id1,id2,id3,moms, msum, wght, abscX, abscY, wght_pb, wght_mv, wght_ht, coeff, ht, r, q, n_tait, B_tait, pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, T_bar, grad_T, i1, i2, j]') - do id3 = is3_qbmm%beg, is3_qbmm%end - do id2 = is2_qbmm%beg, is2_qbmm%end - do id1 = is1_qbmm%beg, is1_qbmm%end + do id3 = is3_qbmm%beg, is3_qbmm%end + do id2 = is2_qbmm%beg, is2_qbmm%end + do id1 = is1_qbmm%beg, is1_qbmm%end alf = q_prim_vf(alf_idx)%sf(id1, id2, id3) pres = q_prim_vf(E_idx)%sf(id1, id2, id3) diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index a4bc08c9c..77c8d4e49 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -359,628 +359,628 @@ contains if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,q,alpha_rho_L,alpha_rho_R,vel_L,vel_R,alpha_L,alpha_R,tau_e_L,tau_e_R,Re_L,Re_R,s_L,s_R,s_S,Ys_L,Ys_R,xi_field_L, xi_field_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, c_fast, pres_mag, B, Ga, vdotB, B2, b4, cm, pcorr, zcoef, vel_L_tmp, vel_R_tmp, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) - end if + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) end if + end if - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp - - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp - - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - pres_mag%L = 0._wp - pres_mag%R = 0._wp + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) - end do + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if + pres_mag%L = 0._wp + pres_mag%R = 0._wp + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + #:endif - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + #:endif - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - #:endif + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + #:endif + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - #:endif + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - #:endif - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! elastic energy update - !if ( hyperelasticity ) then - ! G_L = 0._wp - ! G_R = 0._wp - ! - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_fluids - ! G_L = G_L + alpha_L(i)*Gs_rs(i) - ! G_R = G_R + alpha_R(i)*Gs_rs(i) - ! end do - ! ! Elastic contribution to energy if G large enough - ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then - ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, b_size-1 - ! tau_e_L(i) = 0._wp - ! tau_e_R(i) = 0._wp - ! end do - ! $:GPU_LOOP(parallelism='[seq]') - ! do i = 1, num_dims - ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - ! end do - ! end if - !end if - - @:compute_average_state() - - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) - - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + end if + end do + end if - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + ! elastic energy update + !if ( hyperelasticity ) then + ! G_L = 0._wp + ! G_R = 0._wp + ! + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_fluids + ! G_L = G_L + alpha_L(i)*Gs_rs(i) + ! G_R = G_R + alpha_R(i)*Gs_rs(i) + ! end do + ! ! Elastic contribution to energy if G large enough + ! if ((G_L > 1.e-3_wp) .and. (G_R > 1.e-3_wp)) then + ! E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + ! E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = G_L*qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + ! tau_e_R(i) = G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, b_size-1 + ! tau_e_L(i) = 0._wp + ! tau_e_R(i) = 0._wp + ! end do + ! $:GPU_LOOP(parallelism='[seq]') + ! do i = 1, num_dims + ! xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + ! xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + ! end do + ! end if + !end if + + @:compute_average_state() + + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) + + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) + + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) + + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + end if - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + if (viscous) then + if (chemistry) then + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) + end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if + if (wave_speeds == 1) then if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) + s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) + elseif (hypoelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + & + tau_e_R(dir_idx_tau(1)))/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + & + tau_e_L(dir_idx_tau(1)))/rho_L)) + else if (hyperelasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & + , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & + , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) end if - if (viscous) then - if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - if (wave_speeds == 1) then - if (mhd) then - s_L = min(vel_L(dir_idx(1)) - c_fast%L, vel_R(dir_idx(1)) - c_fast%R) - s_R = max(vel_R(dir_idx(1)) + c_fast%R, vel_L(dir_idx(1)) + c_fast%L) - elseif (hypoelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + & - tau_e_R(dir_idx_tau(1)))/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + & - tau_e_L(dir_idx_tau(1)))/rho_L)) - else if (hyperelasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L) & - , vel_R(dir_idx(1)) - sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + (4._wp*G_R/3._wp)/rho_R) & - , vel_L(dir_idx(1)) + sqrt(c_L*c_L + (4._wp*G_L/3._wp)/rho_L)) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - end if + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & + + (5.e-1_wp - sign(5.e-1_wp, s_L)) & + *(5.e-1_wp + sign(5.e-1_wp, s_R)) - pres_SR = pres_SL + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + ! Mass + if (.not. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) + end do + end if - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R + ! Momentum + if (mhd .and. (.not. relativity)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & + /(s_M - s_P) + end do + elseif (mhd .and. relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & + /(s_M - s_P) + end do + elseif (bubbles_euler) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + end if - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + #:endif + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & + /(s_M - s_P) + end do + end if - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_L)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_R)) & - + (5.e-1_wp - sign(5.e-1_wp, s_L)) & - *(5.e-1_wp + sign(5.e-1_wp, s_R)) + ! Advection + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp end if + end if - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) - end do - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if - ! Momentum - if (mhd .and. (.not. relativity)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) - end do - elseif (mhd .and. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) end do - else + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - #:endif - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if - - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do end if - ! Advection - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & /(s_M - s_P) - end do - - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if - end if - if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do end if + #:endif - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) - end do - end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp - end if - - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) - - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - #:endif - - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1126,719 +1126,719 @@ contains if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l, q, alpha_rho_L,alpha_rho_R,vel_L,vel_R,alpha_L,alpha_R,tau_e_L,tau_e_R,G_L,G_R,Re_L,Re_R,rho_avg,h_avg,gamma_avg,s_L,s_R,s_S,Ys_L,Ys_R,xi_field_L,xi_field_R,Cp_iL,Cp_iR,Xs_L,Xs_R,Gamma_iL,Gamma_iR,Yi_avg,Phi_avg,h_iL,h_iR,h_avg_2,c_fast,pres_mag,B,Ga,vdotB,B2,b4,cm,pcorr,zcoef,vel_grad_L,vel_grad_R,idx_right_phys,vel_L_rms,vel_R_rms,vel_avg_rms,vel_L_tmp,vel_R_tmp,Ms_L,Ms_R,pres_SL,pres_SR,alpha_L_sum,alpha_R_sum,c_avg,pres_L,pres_R,rho_L,rho_R,gamma_L,gamma_R,pi_inf_L,pi_inf_R,qv_L,qv_R,c_L,c_R,E_L,E_R,H_L,H_R,ptilde_L,ptilde_R,s_M,s_P,xi_M,xi_P,Cp_avg,Cv_avg,T_avg,eps,c_sum_Yi_Phi,Cp_L,Cp_R,Cv_L,Cv_R,R_gas_L,R_gas_R,MW_L,MW_R,T_L,T_R,Y_L,Y_R]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do - - vel_L_rms = 0._wp; vel_R_rms = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables - B%L(1) = Bx0 - B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) - end if - end if - - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp - - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp - - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp - - pres_mag%L = 0._wp - pres_mag%R = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) - alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) - alpha_L_sum = alpha_L_sum + alpha_L(i) - alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) - alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) - alpha_R_sum = alpha_R_sum + alpha_R(i) - end do - - alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) - alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do - - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - end do - end if - - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do - - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R - - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) - - if (chem_params%gamma_method == 1) then - ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - - Gamm_L = Cp_L/Cv_L - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) - Gamm_R = Cp_R/Cv_R - gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if - - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) - - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - elseif (mhd .and. relativity) then - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R - elseif (mhd .and. .not. relativity) then - pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) - pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) - E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L - E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy - H_L = (E_L + pres_L - pres_mag%L)/rho_L - H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - end if + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - ! elastic energy update - if (hypoelasticity) then - G_L = 0._wp; G_R = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - ! Elastic contribution to energy if G large enough - !TODO take out if statement if stable without - if ((G_L > 1000) .and. (G_R > 1000)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Double for shear stresses - if (any(strxb - 1 + i == shear_indices)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if - end if - end do + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables + B%L(1) = Bx0 + B%R(1) = Bx0 + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + else ! 2D/3D: Bx, By, Bz as variables + B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg) + B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg) + B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) + B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1) + B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 2) + B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 2) end if + end if - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - if (mhd) then - call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) - call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) - end if + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp - s_L = 0._wp; s_R = 0._wp + pres_mag%L = 0._wp + pres_mag%R = 0._wp + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - s_L = s_L + vel_L(i)**2._wp - s_R = s_R + vel_R(i)**2._wp + do i = 1, num_fluids + alpha_rho_L(i) = max(0._wp, alpha_rho_L(i)) + alpha_L(i) = min(max(0._wp, alpha_L(i)), 1._wp) + alpha_L_sum = alpha_L_sum + alpha_L(i) + alpha_rho_R(i) = max(0._wp, alpha_rho_R(i)) + alpha_R(i) = min(max(0._wp, alpha_R(i)), 1._wp) + alpha_R_sum = alpha_R_sum + alpha_R(i) end do - s_L = sqrt(s_L) - s_R = sqrt(s_R) + alpha_L = alpha_L/max(alpha_L_sum, sgm_eps) + alpha_R = alpha_R/max(alpha_R_sum, sgm_eps) + end if - s_P = max(s_L, s_R) + max(c_L, c_R) - s_M = -s_P + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - s_L = s_M - s_R = s_P + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - ! Mass - if (.not. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(alpha_rho_L(i) & - - alpha_rho_R(i))) & - /(s_M - s_P) - end do - elseif (relativity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & - - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & - + s_M*s_P*(Ga%L*alpha_rho_L(i) & - - Ga%R*alpha_rho_R(i))) & - /(s_M - s_P) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - end if - ! Momentum - if (mhd .and. (.not. relativity)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of rho*v_i in the ${XYZ}$ direction - ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & - - B%R(i)*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & - - B%L(i)*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & - /(s_M - s_P) - end do - elseif (mhd .and. relativity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 3 - ! Flux of m_i in the ${XYZ}$ direction - ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & - (s_M*(cm%R(i)*vel_R(norm_dir) & - - b4%R(i)/Ga%R*B%R(norm_dir) & - + dir_flg(i)*(pres_R + pres_mag%R)) & - - s_P*(cm%L(i)*vel_L(norm_dir) & - - b4%L(i)/Ga%L*B%L(norm_dir) & - + dir_flg(i)*(pres_L + pres_mag%L)) & - + s_M*s_P*(cm%L(i) - cm%R(i))) & - /(s_M - s_P) - end do - elseif (bubbles_euler) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - else if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R & - - tau_e_R(dir_idx_tau(i))) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L & - - tau_e_L(dir_idx_tau(i))) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) - end do - else - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *vel_R(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_R) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *vel_L(dir_idx(i)) & - + dir_flg(dir_idx(i))*pres_L) & - + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - - rho_R*vel_R(dir_idx(i)))) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) - end do - end if + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - ! Energy - if (mhd .and. (.not. relativity)) then - ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - elseif (mhd .and. relativity) then - ! energy flux = m_${XYZ}$ - mass flux - ! Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) - else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - else if (hypoelasticity) then - flux_tau_L = 0._wp; flux_tau_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) - flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & - + s_M*s_P*(E_L - E_R))/(s_M - s_P) - else - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & - - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) & - + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - ! Elastic Stresses - if (hypoelasticity) then - do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - (s_M*(rho_R*vel_R(dir_idx(1)) & - *tau_e_R(i)) & - - s_P*(rho_L*vel_L(dir_idx(1)) & - *tau_e_L(i)) & - + s_M*s_P*(rho_L*tau_e_L(i) & - - rho_R*tau_e_R(i))) & - /(s_M - s_P) - end do + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R + + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) + + if (chem_params%gamma_method == 1) then + ! gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + ! gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + + Gamm_L = Cp_L/Cv_L + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp) + Gamm_R = Cp_R/Cv_R + gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) end if - ! Advection + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) + + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + elseif (mhd .and. relativity) then + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + elseif (mhd .and. .not. relativity) then + pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) + pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) + E_L = gamma_L*pres_L + pi_inf_L + 0.5_wp*rho_L*vel_L_rms + qv_L + pres_mag%L + E_R = gamma_R*pres_R + pi_inf_R + 0.5_wp*rho_R*vel_R_rms + qv_R + pres_mag%R ! includes magnetic energy + H_L = (E_L + pres_L - pres_mag%L)/rho_L + H_R = (E_R + pres_R - pres_mag%R)/rho_R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if + + ! elastic energy update + if (hypoelasticity) then + G_L = 0._wp; G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - (qL_prim_rs${XYZ}$_vf(j, k, l, i) & - - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & - *s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = & - (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & - /(s_M - s_P) + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) end do - if (bubbles_euler) then - ! From HLLC: Kills mass transport @ bubble gas density - if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + if (cont_damage) then + G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, damage_idx)), 0._wp) end if - if (chemistry) then - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + ! Elastic contribution to energy if G large enough + !TODO take out if statement if stable without + if ((G_L > 1000) .and. (G_R > 1000)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Double for shear stresses + if (any(strxb - 1 + i == shear_indices)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + end if + end if + end do + end if - flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & - - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & - /(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - if (mhd) then - if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. - ! B_y flux = v_x * B_y - v_y * Bx0 - ! B_z flux = v_x * B_z - v_z * Bx0 - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 1 - flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & - - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & - + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) - end do - else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction - ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) - ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) - ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) - $:GPU_LOOP(parallelism='[seq]') - do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & - s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & - s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) - end do - end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp - end if + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & - - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if + if (mhd) then + call s_compute_fast_magnetosonic_speed(rho_L, c_L, B%L, norm_dir, c_fast%L, H_L) + call s_compute_fast_magnetosonic_speed(rho_R, c_R, B%R, norm_dir, c_fast%R, H_R) + end if - if (cyl_coord .and. hypoelasticity) then - ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & - (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & - /(s_M - s_P) + s_L = 0._wp; s_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = strxb, strxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - end if - #:endif + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + s_L = s_L + vel_L(i)**2._wp + s_R = s_R + vel_R(i)**2._wp end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - #:endfor + s_L = sqrt(s_L) + s_R = sqrt(s_R) - if (viscous) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l, idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]', copyin='[norm_dir]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end - idx_right_phys(1) = j - idx_right_phys(2) = k - idx_right_phys(3) = l - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + s_P = max(s_L, s_R) + max(c_L, c_R) + s_M = -s_P - if (norm_dir == 1) then + s_L = s_M + s_R = s_P + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! Mass + if (.not. relativity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsx_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rsx_vf(j + 1, k, l, E_idx + i) + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(alpha_rho_L(i) & + - alpha_rho_R(i))) & + /(s_M - s_P) + end do + elseif (relativity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) & + - s_P*Ga%L*alpha_rho_L(i)*vel_L(norm_dir) & + + s_M*s_P*(Ga%L*alpha_rho_L(i) & + - Ga%R*alpha_rho_R(i))) & + /(s_M - s_P) end do + end if + ! Momentum + if (mhd .and. (.not. relativity)) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsx_vf(j, k, l, momxb + i - 1) - vel_R(i) = qR_prim_rsx_vf(j + 1, k, l, momxb + i - 1) + do i = 1, 3 + ! Flux of rho*v_i in the ${XYZ}$ direction + ! = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) & + - B%R(i)*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(rho_L*vel_L(i)*vel_L(norm_dir) & + - B%L(i)*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i))) & + /(s_M - s_P) end do - else if (norm_dir == 2) then + elseif (mhd .and. relativity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsy_vf(k, j, l, E_idx + i) - alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, E_idx + i) + do i = 1, 3 + ! Flux of m_i in the ${XYZ}$ direction + ! = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + delta_(${XYZ}$,i) * p_tot + flux_rs${XYZ}$_vf(j, k, l, contxe + i) = & + (s_M*(cm%R(i)*vel_R(norm_dir) & + - b4%R(i)/Ga%R*B%R(norm_dir) & + + dir_flg(i)*(pres_R + pres_mag%R)) & + - s_P*(cm%L(i)*vel_L(norm_dir) & + - b4%L(i)/Ga%L*B%L(norm_dir) & + + dir_flg(i)*(pres_L + pres_mag%L)) & + + s_M*s_P*(cm%L(i) - cm%R(i))) & + /(s_M - s_P) end do + elseif (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rsy_vf(k, j, l, momxb + i - 1) - vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, momxb + i - 1) + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + end do + else if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R & + - tau_e_R(dir_idx_tau(i))) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L & + - tau_e_L(dir_idx_tau(i))) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) end do else $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rsz_vf(l, k, j, E_idx + i) - alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, E_idx + i) + do i = 1, num_vels + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *vel_R(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_R) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *vel_L(dir_idx(i)) & + + dir_flg(dir_idx(i))*pres_L) & + + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + - rho_R*vel_R(dir_idx(i)))) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do + end if + ! Energy + if (mhd .and. (.not. relativity)) then + ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + elseif (mhd .and. relativity) then + ! energy flux = m_${XYZ}$ - mass flux + ! Hard-coded for single-component for now + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) + else if (bubbles_euler) then + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + else if (hypoelasticity) then + flux_tau_L = 0._wp; flux_tau_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rsz_vf(l, k, j, momxb + i - 1) - vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, momxb + i - 1) + flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) + flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) & + + s_M*s_P*(E_L - E_R))/(s_M - s_P) + else + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*vel_R(dir_idx(1))*(E_R + pres_R) & + - s_P*vel_L(dir_idx(1))*(E_L + pres_L) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) & + + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + end if + + ! Elastic Stresses + if (hypoelasticity) then + do i = 1, strxe - strxb + 1 !TODO: this indexing may be slow + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + (s_M*(rho_R*vel_R(dir_idx(1)) & + *tau_e_R(i)) & + - s_P*(rho_L*vel_L(dir_idx(1)) & + *tau_e_L(i)) & + + s_M*s_P*(rho_L*tau_e_L(i) & + - rho_R*tau_e_R(i))) & + /(s_M - s_P) end do end if + ! Advection $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + (qL_prim_rs${XYZ}$_vf(j, k, l, i) & + - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) & + *s_M*s_P/(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = & + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i)) & + /(s_M - s_P) + end do - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + if (bubbles_euler) then + ! From HLLC: Kills mass transport @ bubble gas density + if (num_fluids > 1) then + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + end if + end if + if (chemistry) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + + flux_rs${XYZ}$_vf(j, k, l, i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) & + - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + + s_M*s_P*(Y_L*rho_L - Y_R*rho_R)) & + /(s_M - s_P) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do + end if + + if (mhd) then + if (n == 0) then ! 1D: d/dx flux only & Bx = Bx0 = const. + ! B_y flux = v_x * B_y - v_y * Bx0 + ! B_z flux = v_x * B_z - v_z * Bx0 + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 1 + flux_rsx_vf(j, k, l, B_idx%beg + i) = (s_M*(vel_R(1)*B%R(2 + i) - vel_R(2 + i)*Bx0) & + - s_P*(vel_L(1)*B%L(2 + i) - vel_L(2 + i)*Bx0) & + + s_M*s_P*(B%L(2 + i) - B%R(2 + i)))/(s_M - s_P) + end do + else ! 2D/3D: Bx, By, Bz /= const. but zero flux component in the same direction + ! B_x d/d${XYZ}$ flux = (1 - delta(x,${XYZ}$)) * (v_${XYZ}$ * B_x - v_x * B_${XYZ}$) + ! B_y d/d${XYZ}$ flux = (1 - delta(y,${XYZ}$)) * (v_${XYZ}$ * B_y - v_y * B_${XYZ}$) + ! B_z d/d${XYZ}$ flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) + $:GPU_LOOP(parallelism='[seq]') + do i = 0, 2 + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + i) = (1 - dir_flg(i + 1))*( & + s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - & + s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + & + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) + end do + end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + end if + + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + 2) & + - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + end if + + if (cyl_coord .and. hypoelasticity) then + ! += tau_sigmasigma using HLL + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + 2) + & + (s_M*tau_e_R(4) - s_P*tau_e_L(4)) & + /(s_M - s_P) + + $:GPU_LOOP(parallelism='[seq]') + do i = strxb, strxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + end if + #:endif + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + #:endfor + + if (viscous) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l, idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]', copyin='[norm_dir]') + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end + idx_right_phys(1) = j + idx_right_phys(2) = k + idx_right_phys(3) = l + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + + if (norm_dir == 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsx_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rsx_vf(j + 1, k, l, E_idx + i) + end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsx_vf(j, k, l, momxb + i - 1) + vel_R(i) = qR_prim_rsx_vf(j + 1, k, l, momxb + i - 1) + end do + else if (norm_dir == 2) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsy_vf(k, j, l, E_idx + i) + alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, E_idx + i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsy_vf(k, j, l, momxb + i - 1) + vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, momxb + i - 1) + end do + else + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rsz_vf(l, k, j, E_idx + i) + alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, E_idx + i) end do - if (shear_stress) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rsz_vf(l, k, j, momxb + i - 1) + vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, momxb + i - 1) + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - if (norm_dir == 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) - end if - end if + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) + end do - else if (norm_dir == 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + + if (shear_stress) then + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + end do - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (norm_dir == 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) end if - else - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + end if - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) + else if (norm_dir == 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) end if - end if + else + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - if (bulk_stress) then + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if - end do + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) - if (norm_dir == 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - end if - end if + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + + end if + end if + + if (bulk_stress) then - else if (norm_dir == 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if + end do - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (norm_dir == 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) end if - else - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + end if - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + else if (norm_dir == 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) end if + else + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) end if - end do + + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1999,1576 +1999,1576 @@ contains if (model_eqns == 3) then !ME3 $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l, q, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, tau_e_L, tau_e_R, flux_ene_e, xi_field_L, xi_field_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_MP, xi_PP, idx1, idxi]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - - idx1 = dir_idx(1) - - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + idx1 = dir_idx(1) - rho_L = 0._wp - gamma_L = 0._wp - pi_inf_L = 0._wp - qv_L = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp - rho_R = 0._wp - gamma_R = 0._wp - pi_inf_R = 0._wp - qv_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - alpha_L_sum = 0._wp - alpha_R_sum = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - end do + rho_L = 0._wp + gamma_L = 0._wp + pi_inf_L = 0._wp + qv_L = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + rho_R = 0._wp + gamma_R = 0._wp + pi_inf_R = 0._wp + qv_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + alpha_L_sum = 0._wp + alpha_R_sum = 0._wp + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + end do - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, advxb + i - 1) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, advxb + i - 1) + end do - if (viscous) then + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + do q = 1, Re_size(i) + Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - end if + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp; G_R = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do + end do + end if + + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp; G_R = 0._wp; + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - @:compute_average_state() + @:compute_average_state() - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - ! COMPUTING THE DIRECT WAVE SPEEDS - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + ! COMPUTING THE DIRECT WAVE SPEEDS + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - - ! goes with numerical star velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) - xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) - - ! goes with the numerical velocity in x/y/z directions - ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) - xi_MP = -min(0._wp, sign(1._wp, s_L)) - xi_PP = max(0._wp, sign(1._wp, s_R)) - - E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & - xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) - p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & - xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) - - rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & - xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) - - vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & - xi_MP*xi_PP*(s_S - vel_R(idx1)) - - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) + + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if + + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + + ! goes with numerical star velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(0.5_wp, s_S)) + xi_P = (5.e-1_wp - sign(0.5_wp, s_S)) + + ! goes with the numerical velocity in x/y/z directions + ! xi_P/M (pressure) = min/max(0. sgn(1,sL/sR)) + xi_MP = -min(0._wp, sign(1._wp, s_L)) + xi_PP = max(0._wp, sign(1._wp, s_R)) + + E_star = xi_M*(E_L + xi_MP*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))))) - E_L)) + & + xi_P*(E_R + xi_PP*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1))))) - E_R)) + p_Star = xi_M*(pres_L + xi_MP*(rho_L*(s_L - vel_L(dir_idx(1)))*(s_S - vel_L(dir_idx(1))))) + & + xi_P*(pres_R + xi_PP*(rho_R*(s_R - vel_R(dir_idx(1)))*(s_S - vel_R(dir_idx(1))))) + + rho_Star = xi_M*(rho_L*(xi_MP*xi_L + 1._wp - xi_MP)) + & + xi_P*(rho_R*(xi_PP*xi_R + 1._wp - xi_PP)) + + vel_K_Star = vel_L(idx1)*(1._wp - xi_MP) + xi_MP*vel_R(idx1) + & + xi_MP*xi_PP*(s_S - vel_R(idx1)) + + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! COMPUTING FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do + + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & + (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + end do - ! COMPUTING FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*(vel_L(idx1) + s_M*(xi_L - 1._wp)) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp; $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = rho_Star*vel_K_Star* & - (dir_flg(idxi)*vel_K_Star + (1._wp - dir_flg(idxi))*(xi_M*vel_L(idxi) + xi_P*vel_R(idxi))) + dir_flg(idxi)*p_Star & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + ! MOMENTUM ELASTIC FLUX. + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = (E_star + p_Star)*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S + end do - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp; - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if + ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & + xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + end do - ! VOLUME FRACTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i)*s_S + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S - end do + ! INTERNAL ENERGIES ADVECTION FLUX. + ! K-th pressure and velocity in preparation for the internal energy flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & + xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & + xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & + xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) + + flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & + ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & + (gammas(i)*p_K_Star + pi_infs(i)) + & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & + qvs(i))*vel_K_Star & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) + end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - ! SOURCE TERM FOR VOLUME FRACTION ADVECTION FLUX. + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + dir_flg(idxi)*(s_S*(xi_MP*(xi_L - 1) + 1) - vel_L(idxi))) + & - xi_P*(vel_R(idxi) + dir_flg(idxi)*(s_S*(xi_PP*(xi_R - 1) + 1) - vel_R(idxi))) + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do + end if - ! INTERNAL ENERGIES ADVECTION FLUX. - ! K-th pressure and velocity in preparation for the internal energy flux + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - p_K_Star = xi_M*(xi_MP*((pres_L + pi_infs(i)/(1._wp + gammas(i)))* & - xi_L**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_L) + pres_L) + & - xi_P*(xi_PP*((pres_R + pi_infs(i)/(1._wp + gammas(i)))* & - xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) + pres_R) - - flux_rs${XYZ}$_vf(j, k, l, i + intxb - 1) = & - ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1))* & - (gammas(i)*p_K_Star + pi_infs(i)) + & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + contxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + contxb - 1))* & - qvs(i))*vel_K_Star & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i + advxb - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i + advxb - 1)) + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + end if - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) end do - end if - - ! REFERENCE MAP FLUX. - if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) + do i = intxb, intxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) + & - xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx))*s_S + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if + #:endif - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = intxb, intxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) = & - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb - 1 + dir_idx(1)) - p_Star - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (model_eqns == 4) then !ME4 $:GPU_PARALLEL_LOOP(collapse=3, private='[i, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, alpha_R, nbub_L, nbub_R, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_MP, xi_PP]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - qv_L = qv_L + alpha_rho_L(i)*qvs(i) - - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - qv_R = qv_R + alpha_rho_R(i)*qvs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + qv_L = qv_L + alpha_rho_L(i)*qvs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + qv_R = qv_R + alpha_rho_R(i)*qvs(i) + end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + + @:compute_average_state() + + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) + + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - @:compute_average_state() + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + pres_SR = pres_SL - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if + + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*alpha_rho_L(i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*alpha_rho_R(i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_L) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*pres_R) + end do + + if (bubbles_euler) then + ! Put p_tilde in + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & + xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & + + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) + end do + end if + + flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = alf_idx, alf_idx !only advect the void fraction + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do + + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp + end do - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Add advection flux for bubble variables + if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe + do i = bubxb, bubxe flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*alpha_rho_L(i) & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*alpha_rho_R(i) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) end do + end if - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + ! Geometrical source flux for cylindrical coordinates + + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_L) & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*pres_R) - end do - - if (bubbles_euler) then - ! Put p_tilde in + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do + end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + & - xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L)) & - + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) end if + #:endif + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + elseif (model_eqns == 2 .and. bubbles_euler) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[i, q, R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, nbub_L, nbub_R, Re_L, Re_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, c_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_MP, xi_PP, nbub_L_denom, nbub_R_denom, Pbwr3Lbar, PbwR3Rbar, R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = 0._wp + vel_L_rms = 0._wp; vel_R_rms = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do + ! Retain this in the refactor + if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') - do i = alf_idx, alf_idx !only advect the void fraction - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do - - ! Source for volume fraction advection equation + else if (num_fluids > 2) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp + do i = 1, num_fluids - 1 + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) end do + else + rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) + gamma_L = gammas(1) + pi_inf_L = pi_infs(1) + qv_L = qvs(1) + rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) + gamma_R = gammas(1) + pi_inf_R = pi_infs(1) + qv_R = qvs(1) + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) - - ! Add advection flux for bubble variables - if (bubbles_euler) then + if (viscous) then + if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do - end if + do i = 1, 2 + Re_L(i) = dflt_real + Re_R(i) = dflt_real - ! Geometrical source flux for cylindrical coordinates + if (Re_size(i) > 0) Re_L(i) = 0._wp + if (Re_size(i) > 0) Re_R(i) = 0._wp - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + do q = 1, Re_size(i) + Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & + + Re_R(i) end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - elseif (model_eqns == 2 .and. bubbles_euler) then - $:GPU_PARALLEL_LOOP(collapse=3, private='[i, q, R0_L, R0_R, V0_L, V0_R, P0_L, P0_R, pbw_L, pbw_R, vel_L, vel_R, rho_avg, alpha_L, alpha_R, h_avg, gamma_avg, nbub_L, nbub_R, Re_L, Re_R, pcorr, zcoef, rho_L, rho_R, pres_L, pres_R, E_L, E_R, H_L, H_R, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, T_L, T_R, Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Gamm_L, Gamm_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, c_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, xi_M, xi_P, xi_L, xi_R, xi_MP, xi_PP, nbub_L_denom, nbub_R_denom, Pbwr3Lbar, PbwR3Rbar, R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp + end do + end if + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - vel_L_rms = 0._wp; vel_R_rms = 0._wp + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + + if (avg_state == 2) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, nb + R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) + R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) + + V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) + V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) + if (.not. polytropic .and. .not. qbmm) then + P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) + P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) + end if end do - ! Retain this in the refactor - if (mpp_lim .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do - else if (num_fluids > 2) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do + if (.not. qbmm) then + if (adv_n) then + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) + else + nbub_L_denom = 0._wp + nbub_R_denom = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) + nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) + end do + nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom + nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom + end if else - rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) - gamma_L = gammas(1) - pi_inf_L = pi_infs(1) - qv_L = qvs(1) - rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) - gamma_R = gammas(1) - pi_inf_R = pi_infs(1) - qv_R = qvs(1) + !nb stored in 0th moment of first R0 bin in variable conversion module + nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) + nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) end if - if (viscous) then - if (num_fluids == 1) then ! Need to consider case with num_fluids >= 2 - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_L(i) = dflt_real - Re_R(i) = dflt_real - - if (Re_size(i) > 0) Re_L(i) = 0._wp - if (Re_size(i) > 0) Re_R(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nb + if (.not. qbmm) then + pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) + pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) + end if + end do - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + Re_idx(i, q)))/Res_gs(i, q) & - + Re_R(i) - end do + if (qbmm) then + PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) + PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) + R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) - end do - end if - end if + R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) + R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) + else - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + PbwR3Lbar = 0._wp + PbwR3Rbar = 0._wp - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + R3Lbar = 0._wp + R3Rbar = 0._wp - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + R3V2Lbar = 0._wp + R3V2Rbar = 0._wp - if (avg_state == 2) then $:GPU_LOOP(parallelism='[seq]') do i = 1, nb - R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) - R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) - - V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) - V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) - if (.not. polytropic .and. .not. qbmm) then - P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) - P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) - end if - end do + PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) + PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) - if (.not. qbmm) then - if (adv_n) then - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, n_idx) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, n_idx) - else - nbub_L_denom = 0._wp - nbub_R_denom = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - nbub_L_denom = nbub_L_denom + (R0_L(i)**3._wp)*weight(i) - nbub_R_denom = nbub_R_denom + (R0_R(i)**3._wp)*weight(i) - end do - nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)/nbub_L_denom - nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)/nbub_R_denom - end if - else - !nb stored in 0th moment of first R0 bin in variable conversion module - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, bubxb) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, bubxb) - end if + R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) + R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - if (.not. qbmm) then - pbw_L(i) = f_cpbw_KM(R0(i), R0_L(i), V0_L(i), P0_L(i)) - pbw_R(i) = f_cpbw_KM(R0(i), R0_R(i), V0_R(i), P0_R(i)) - end if + R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) + R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) end do + end if - if (qbmm) then - PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) - PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) - - R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) - R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) - - R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) - R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) - else - - PbwR3Lbar = 0._wp - PbwR3Rbar = 0._wp - - R3Lbar = 0._wp - R3Rbar = 0._wp + if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L + else + ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & + rho_L*R3V2Lbar/R3Lbar) + end if - R3V2Lbar = 0._wp - R3V2Rbar = 0._wp + if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R + else + ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & + rho_R*R3V2Rbar/R3Rbar) + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, nb - PbwR3Lbar = PbwR3Lbar + pbw_L(i)*(R0_L(i)**3._wp)*weight(i) - PbwR3Rbar = PbwR3Rbar + pbw_R(i)*(R0_R(i)**3._wp)*weight(i) + if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then + end if - R3Lbar = R3Lbar + (R0_L(i)**3._wp)*weight(i) - R3Rbar = R3Rbar + (R0_R(i)**3._wp)*weight(i) + rho_avg = 5.e-1_wp*(rho_L + rho_R) + H_avg = 5.e-1_wp*(H_L + H_R) + gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) + vel_avg_rms = 0._wp - R3V2Lbar = R3V2Lbar + (R0_L(i)**3._wp)*(V0_L(i)**2._wp)*weight(i) - R3V2Rbar = R3V2Rbar + (R0_R(i)**3._wp)*(V0_R(i)**2._wp)*weight(i) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp + end do - if (qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids) < small_alf .or. R3Lbar < small_alf) then - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*pres_L - else - ptilde_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + num_fluids)*(pres_L - PbwR3Lbar/R3Lbar - & - rho_L*R3V2Lbar/R3Lbar) - end if + end if - if (qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids) < small_alf .or. R3Rbar < small_alf) then - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*pres_R - else - ptilde_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + num_fluids)*(pres_R - PbwR3Rbar/R3Rbar - & - rho_R*R3V2Rbar/R3Rbar) - end if + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - if ((.not. f_approx_equal(ptilde_L, ptilde_L)) .or. (.not. f_approx_equal(ptilde_R, ptilde_R))) then - end if + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - rho_avg = 5.e-1_wp*(rho_L + rho_R) - H_avg = 5.e-1_wp*(H_L + H_R) - gamma_avg = 5.e-1_wp*(gamma_L + gamma_R) - vel_avg_rms = 0._wp + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, 0._wp, c_avg) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_avg_rms = vel_avg_rms + (5.e-1_wp*(vel_L(i) + vel_R(i)))**2._wp - end do + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + if (wave_speeds == 1) then + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - & + rho_R*vel_R(dir_idx(1))* & + (s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - & + rho_R*(s_R - vel_R(dir_idx(1)))) + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(dir_idx(1)) - & + vel_R(dir_idx(1)))) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, 0._wp, c_avg) + pres_SR = pres_SL - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do - end if + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + s_L = vel_L(dir_idx(1)) - c_L*Ms_L + s_R = vel_R(dir_idx(1)) + c_R*Ms_R - if (wave_speeds == 1) then - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - & - rho_R*vel_R(dir_idx(1))* & - (s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - & - rho_R*(s_R - vel_R(dir_idx(1)))) - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(dir_idx(1)) - & - vel_R(dir_idx(1)))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(dir_idx(1)) - c_L*Ms_L - s_R = vel_R(dir_idx(1)) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(dir_idx(1)) + vel_R(dir_idx(1))) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) - end if + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) + xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(dir_idx(1)))/(s_L - s_S) - xi_R = (s_R - vel_R(dir_idx(1)))/(s_R - s_S) + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + if (bubbles_euler .and. (num_fluids > 1)) then + ! Kill mass transport @ gas density + flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp + end if - if (bubbles_euler .and. (num_fluids > 1)) then - ! Kill mass transport @ gas density - flux_rs${XYZ}$_vf(j, k, l, contxe) = 0._wp - end if + ! Momentum flux. + ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) - ! Momentum flux. - ! f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! Include p_tilde - ! Include p_tilde + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(i)) + & + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(i)) + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & + (1._wp - dir_flg(dir_idx(i)))* & + vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(i)) + & - s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(i)) + & - s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + & - (1._wp - dir_flg(dir_idx(i)))* & - vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + & - dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr - end do + ! Energy flux. + ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & + (rho_L*s_S + (pres_L - ptilde_L)/ & + (s_L - vel_L(dir_idx(1))))) - E_L)) & + + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & + (rho_R*s_S + (pres_R - ptilde_R)/ & + (s_R - vel_R(dir_idx(1))))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! Volume fraction flux + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - ! Energy flux. - ! f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(dir_idx(1)))* & - (rho_L*s_S + (pres_L - ptilde_L)/ & - (s_L - vel_L(dir_idx(1))))) - E_L)) & - + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)))* & - (rho_R*s_S + (pres_R - ptilde_R)/ & - (s_R - vel_R(dir_idx(1))))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! Volume fraction flux - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + ! Source for volume fraction advection equation + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & + xi_M*(vel_L(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(dir_idx(i)) + & + dir_flg(dir_idx(i))* & + s_P*(xi_R - 1._wp)) + + !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp + end do - ! Source for volume fraction advection equation - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = & - xi_M*(vel_L(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(dir_idx(i)) + & - dir_flg(dir_idx(i))* & - s_P*(xi_R - 1._wp)) - - !IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(idxi)%sf(j,k,l) = 0._wp - end do + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + ! Add advection flux for bubble variables + $:GPU_LOOP(parallelism='[seq]') + do i = bubxb, bubxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end do - ! Add advection flux for bubble variables - $:GPU_LOOP(parallelism='[seq]') - do i = bubxb, bubxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end do + if (qbmm) then + flux_rs${XYZ}$_vf(j, k, l, bubxb) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if - if (qbmm) then - flux_rs${XYZ}$_vf(j, k, l, bubxb) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) - end if + if (adv_n) then + flux_rs${XYZ}$_vf(j, k, l, n_idx) = & + xi_M*nbub_L & + *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & + + xi_P*nbub_R & + *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + end if - if (adv_n) then - flux_rs${XYZ}$_vf(j, k, l, n_idx) = & - xi_M*nbub_L & - *(vel_L(dir_idx(1)) + s_M*(xi_L - 1._wp)) & - + xi_P*nbub_R & - *(vel_R(dir_idx(1)) + s_P*(xi_R - 1._wp)) + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + ! Substituting the advective flux into the inviscid geometrical source flux + $:GPU_LOOP(parallelism='[seq]') + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & + xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + + xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - ! Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = & - xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - + xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(dir_idx(1))* & - vel_L(dir_idx(1)) + & - s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - - xi_P*(rho_R*(vel_R(dir_idx(1))* & - vel_R(dir_idx(1)) + & - s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & - (1._wp - dir_flg(dir_idx(1)))* & - vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - - end if - #:endif - end do + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(dir_idx(1))* & + vel_L(dir_idx(1)) + & + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + - xi_P*(rho_R*(vel_R(dir_idx(1))* & + vel_R(dir_idx(1)) + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + & + (1._wp - dir_flg(dir_idx(1)))* & + vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + + end if + #:endif end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! 5-EQUATION MODEL WITH HLLC $:GPU_PARALLEL_LOOP(collapse=3, private='[Re_max, i, q, T_L, T_R, idx1, idxi, vel_L_rms, vel_R_rms, pres_L, pres_R, rho_L, gamma_L, pi_inf_L, qv_L, rho_R, gamma_R, pi_inf_R, qv_R, alpha_L_sum, alpha_R_sum, E_L, E_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, Y_L, Y_R, H_L, H_R, rho_avg, gamma_avg, H_avg, c_L, c_R, c_avg, s_P, s_M, xi_P, xi_M, xi_L, xi_R, Ms_L, Ms_R, pres_SL, pres_SR, vel_L, vel_R, Re_L, Re_R, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg,Phi_avg, h_iL, h_iR, h_avg_2, G_L, G_R]', copyin='[is1, is2, is3]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + + idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + + vel_L_rms = 0._wp; vel_R_rms = 0._wp + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp + qv_L = 0._wp; qv_R = 0._wp + alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - idx1 = 1; if (dir_idx(1) == 2) idx1 = 2; if (dir_idx(1) == 3) idx1 = 3 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) + vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) + vel_L_rms = vel_L_rms + vel_L(i)**2._wp + vel_R_rms = vel_R_rms + vel_R(i)**2._wp + end do - vel_L_rms = 0._wp; vel_R_rms = 0._wp - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp - qv_L = 0._wp; qv_R = 0._wp - alpha_L_sum = 0._wp; alpha_R_sum = 0._wp + pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + ! Change this by splitting it into the cases + ! present in the bubbles_euler + if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) end do $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + i) - vel_L_rms = vel_L_rms + vel_L(i)**2._wp - vel_R_rms = vel_R_rms + vel_R(i)**2._wp + do i = 1, num_fluids + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) end do + end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! Change this by splitting it into the cases - ! present in the bubbles_euler - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = min(max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) + gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) + + rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)/max(alpha_R_sum, sgm_eps) - end do - end if + Re_max = 0 + if (Re_size(1) > 0) Re_max = 1 + if (Re_size(2) > 0) Re_max = 2 + if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - end do + do i = 1, Re_max + Re_L(i) = 0._wp + Re_R(i) = 0._wp - Re_max = 0 - if (Re_size(1) > 0) Re_max = 1 - if (Re_size(2) > 0) Re_max = 2 - - if (viscous) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, Re_max - Re_L(i) = 0._wp - Re_R(i) = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & - + Re_L(i) - Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & - + Re_R(i) - end do - - Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) - Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + do q = 1, Re_size(i) + Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) & + + Re_L(i) + Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) & + + Re_R(i) end do - end if - if (chemistry) then - c_sum_Yi_Phi = 0.0_wp - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) + Re_R(i) = 1._wp/max(Re_R(i), sgm_eps) + end do + end if - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) + if (chemistry) then + c_sum_Yi_Phi = 0.0_wp + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Ys_R(i - chemxb + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) - R_gas_L = gas_constant/MW_L - R_gas_R = gas_constant/MW_R + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) - T_L = pres_L/rho_L/R_gas_L - T_R = pres_R/rho_R/R_gas_R + R_gas_L = gas_constant/MW_L + R_gas_R = gas_constant/MW_R - call get_species_specific_heats_r(T_L, Cp_iL) - call get_species_specific_heats_r(T_R, Cp_iR) + T_L = pres_L/rho_L/R_gas_L + T_R = pres_R/rho_R/R_gas_R - if (chem_params%gamma_method == 1) then - !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. - Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) - Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) + call get_species_specific_heats_r(T_L, Cp_iL) + call get_species_specific_heats_r(T_R, Cp_iR) - gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) - gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) - else if (chem_params%gamma_method == 2) then - !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) - call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) + if (chem_params%gamma_method == 1) then + !> gamma_method = 1: Ref. Section 2.3.1 Formulation of doi:10.7907/ZKW8-ES97. + Gamma_iL = Cp_iL/(Cp_iL - 1.0_wp) + Gamma_iR = Cp_iR/(Cp_iR - 1.0_wp) - Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R - gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) - end if + gamma_L = sum(Xs_L(:)/(Gamma_iL(:) - 1.0_wp)) + gamma_R = sum(Xs_R(:)/(Gamma_iR(:) - 1.0_wp)) + else if (chem_params%gamma_method == 2) then + !> gamma_method = 2: c_p / c_v where c_p, c_v are specific heats. + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_specific_heat_cv_mass(T_L, Ys_L, Cv_L) + call get_mixture_specific_heat_cv_mass(T_R, Ys_R, Cv_R) - call get_mixture_energy_mass(T_L, Ys_L, E_L) - call get_mixture_energy_mass(T_R, Ys_R, E_R) + Gamm_L = Cp_L/Cv_L; Gamm_R = Cp_R/Cv_R + gamma_L = 1.0_wp/(Gamm_L - 1.0_wp); gamma_R = 1.0_wp/(Gamm_R - 1.0_wp) + end if - E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms - E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - else - E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L - E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R + call get_mixture_energy_mass(T_L, Ys_L, E_L) + call get_mixture_energy_mass(T_R, Ys_R, E_R) - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R - end if + E_L = rho_L*E_L + 5.e-1*rho_L*vel_L_rms + E_R = rho_R*E_R + 5.e-1*rho_R*vel_R_rms + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + else + E_L = gamma_L*pres_L + pi_inf_L + 5.e-1*rho_L*vel_L_rms + qv_L + E_R = gamma_R*pres_R + pi_inf_R + 5.e-1*rho_R*vel_R_rms + qv_R - ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - ! Elastic contribution to energy if G large enough - if ((G_L > verysmall) .and. (G_R > verysmall)) then + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R + end if + + ! ENERGY ADJUSTMENTS FOR HYPOELASTIC ENERGY + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + ! Elastic contribution to energy if G large enough + if ((G_L > verysmall) .and. (G_R > verysmall)) then + E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) + E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) + ! Additional terms in 2D and 3D + if ((i == 2) .or. (i == 4) .or. (i == 5)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - ! Additional terms in 2D and 3D - if ((i == 2) .or. (i == 4) .or. (i == 5)) then - E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) - E_R = E_R + (tau_e_R(i)*tau_e_R(i))/(4._wp*G_R) - end if end if - end do - end if - - ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) - end do - G_L = 0._wp - G_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - ! Mixture left and right shear modulus - G_L = G_L + alpha_L(i)*Gs_rs(i) - G_R = G_R + alpha_R(i)*Gs_rs(i) - end do - ! Elastic contribution to energy if G large enough - if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) - end do + end do + end if + + ! ENERGY ADJUSTMENTS FOR HYPERELASTIC ENERGY + if (hyperelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) + xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, xibeg - 1 + i) + end do + G_L = 0._wp + G_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! Mixture left and right shear modulus + G_L = G_L + alpha_L(i)*Gs_rs(i) + G_R = G_R + alpha_R(i)*Gs_rs(i) + end do + ! Elastic contribution to energy if G large enough + if (G_L > verysmall .and. G_R > verysmall) then + E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, xiend + 1) + E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, xiend + 1) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, b_size - 1 + tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) + tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, strxb - 1 + i) + end do + end if - H_L = (E_L + pres_L)/rho_L - H_R = (E_R + pres_R)/rho_R + H_L = (E_L + pres_L)/rho_L + H_R = (E_R + pres_R)/rho_R - @:compute_average_state() + @:compute_average_state() - call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & - vel_L_rms, 0._wp, c_L) + call s_compute_speed_of_sound(pres_L, rho_L, gamma_L, pi_inf_L, H_L, alpha_L, & + vel_L_rms, 0._wp, c_L) - call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & - vel_R_rms, 0._wp, c_R) + call s_compute_speed_of_sound(pres_R, rho_R, gamma_R, pi_inf_R, H_R, alpha_R, & + vel_R_rms, 0._wp, c_R) - !> The computation of c_avg does not require all the variables, and therefore the non '_avg' - ! variables are placeholders to call the subroutine. - call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & - vel_avg_rms, c_sum_Yi_Phi, c_avg) + !> The computation of c_avg does not require all the variables, and therefore the non '_avg' + ! variables are placeholders to call the subroutine. + call s_compute_speed_of_sound(pres_R, rho_avg, gamma_avg, pi_inf_R, H_avg, alpha_R, & + vel_avg_rms, c_sum_Yi_Phi, c_avg) - if (viscous) then - if (chemistry) then - call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) - end do + if (viscous) then + if (chemistry) then + call compute_viscosity_and_inversion(T_L, Ys_L, T_R, Ys_R, Re_L(1), Re_R(1)) end if + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + end do + end if - ! Low Mach correction - if (low_Mach == 2) then - @:compute_low_Mach_correction() - end if + ! Low Mach correction + if (low_Mach == 2) then + @:compute_low_Mach_correction() + end if - if (wave_speeds == 1) then - if (elasticity) then - s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) - s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & - (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & - (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) - s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & - tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & - rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & - rho_R*(s_R - vel_R(idx1))) - else - s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) - s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) - s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & - (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & - /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) + if (wave_speeds == 1) then + if (elasticity) then + s_L = min(vel_L(dir_idx(1)) - sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L), vel_R(dir_idx(1)) - sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R)) + s_R = max(vel_R(dir_idx(1)) + sqrt(c_R*c_R + & + (((4._wp*G_R)/3._wp) + tau_e_R(dir_idx_tau(1)))/rho_R), vel_L(dir_idx(1)) + sqrt(c_L*c_L + & + (((4._wp*G_L)/3._wp) + tau_e_L(dir_idx_tau(1)))/rho_L)) + s_S = (pres_R - tau_e_R(dir_idx_tau(1)) - pres_L + & + tau_e_L(dir_idx_tau(1)) + rho_L*vel_L(idx1)*(s_L - vel_L(idx1)) - & + rho_R*vel_R(idx1)*(s_R - vel_R(idx1)))/(rho_L*(s_L - vel_L(idx1)) - & + rho_R*(s_R - vel_R(idx1))) + else + s_L = min(vel_L(dir_idx(1)) - c_L, vel_R(dir_idx(1)) - c_R) + s_R = max(vel_R(dir_idx(1)) + c_R, vel_L(dir_idx(1)) + c_L) + s_S = (pres_R - pres_L + rho_L*vel_L(dir_idx(1))* & + (s_L - vel_L(dir_idx(1))) - rho_R*vel_R(dir_idx(1))*(s_R - vel_R(dir_idx(1)))) & + /(rho_L*(s_L - vel_L(dir_idx(1))) - rho_R*(s_R - vel_R(dir_idx(1)))) - end if - elseif (wave_speeds == 2) then - pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & - (vel_L(idx1) - & - vel_R(idx1))) - - pres_SR = pres_SL - - Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & - (pres_SL/pres_L - 1._wp)*pres_L/ & - ((pres_L + pi_inf_L/(1._wp + gamma_L))))) - Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & - (pres_SR/pres_R - 1._wp)*pres_R/ & - ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - - s_L = vel_L(idx1) - c_L*Ms_L - s_R = vel_R(idx1) + c_R*Ms_R - - s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & - (pres_L - pres_R)/ & - (rho_avg*c_avg)) end if + elseif (wave_speeds == 2) then + pres_SL = 5.e-1_wp*(pres_L + pres_R + rho_avg*c_avg* & + (vel_L(idx1) - & + vel_R(idx1))) + + pres_SR = pres_SL + + Ms_L = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_L)/(1._wp + gamma_L))* & + (pres_SL/pres_L - 1._wp)*pres_L/ & + ((pres_L + pi_inf_L/(1._wp + gamma_L))))) + Ms_R = max(1._wp, sqrt(1._wp + ((5.e-1_wp + gamma_R)/(1._wp + gamma_R))* & + (pres_SR/pres_R - 1._wp)*pres_R/ & + ((pres_R + pi_inf_R/(1._wp + gamma_R))))) - ! follows Einfeldt et al. - ! s_M/P = min/max(0.,s_L/R) - s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) + s_L = vel_L(idx1) - c_L*Ms_L + s_R = vel_R(idx1) + c_R*Ms_R + + s_S = 5.e-1_wp*((vel_L(idx1) + vel_R(idx1)) + & + (pres_L - pres_R)/ & + (rho_avg*c_avg)) + end if - ! goes with q_star_L/R = xi_L/R * (variable) - ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) - xi_L = (s_L - vel_L(idx1))/(s_L - s_S) - xi_R = (s_R - vel_R(idx1))/(s_R - s_S) + ! follows Einfeldt et al. + ! s_M/P = min/max(0.,s_L/R) + s_M = min(0._wp, s_L); s_P = max(0._wp, s_R) - ! goes with numerical velocity in x/y/z directions - ! xi_P/M = 0.5 +/m sgn(0.5,s_star) - xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) - xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) + ! goes with q_star_L/R = xi_L/R * (variable) + ! xi_L/R = ( ( s_L/R - u_L/R )/(s_L/R - s_star) ) + xi_L = (s_L - vel_L(idx1))/(s_L - s_S) + xi_R = (s_R - vel_R(idx1))/(s_R - s_S) - ! Low Mach correction - if (low_Mach == 1) then - @:compute_low_Mach_correction() - else - pcorr = 0._wp - end if + ! goes with numerical velocity in x/y/z directions + ! xi_P/M = 0.5 +/m sgn(0.5,s_star) + xi_M = (5.e-1_wp + sign(5.e-1_wp, s_S)) + xi_P = (5.e-1_wp - sign(5.e-1_wp, s_S)) - ! COMPUTING THE HLLC FLUXES - ! MASS FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = 1, contxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! Low Mach correction + if (low_Mach == 1) then + @:compute_low_Mach_correction() + else + pcorr = 0._wp + end if + + ! COMPUTING THE HLLC FLUXES + ! MASS FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, contxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do + + ! MOMENTUM FLUX. + ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idxi) + & + s_M*(xi_L*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_L(idxi)) - vel_L(idxi))) + & + dir_flg(idxi)*(pres_L)) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idxi) + & + s_P*(xi_R*(dir_flg(idxi)*s_S + & + (1._wp - dir_flg(idxi))* & + vel_R(idxi)) - vel_R(idxi))) + & + dir_flg(idxi)*(pres_R)) & + + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + end do - ! MOMENTUM FLUX. - ! f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) + ! ENERGY FLUX. + ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + xi_M*(vel_L(idx1)*(E_L + pres_L) + & + s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & + (rho_L*s_S + pres_L/ & + (s_L - vel_L(idx1)))) - E_L)) & + + xi_P*(vel_R(idx1)*(E_R + pres_R) + & + s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & + (rho_R*s_S + pres_R/ & + (s_R - vel_R(idx1)))) - E_R)) & + + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + + ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux + if (elasticity) then + flux_ene_e = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims idxi = dir_idx(i) + ! MOMENTUM ELASTIC FLUX. flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idxi) + & - s_M*(xi_L*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_L(idxi)) - vel_L(idxi))) + & - dir_flg(idxi)*(pres_L)) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idxi) + & - s_P*(xi_R*(dir_flg(idxi)*s_S + & - (1._wp - dir_flg(idxi))* & - vel_R(idxi)) - vel_R(idxi))) + & - dir_flg(idxi)*(pres_R)) & - + (s_M/s_L)*(s_P/s_R)*dir_flg(idxi)*pcorr + flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & + - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) + ! ENERGY ELASTIC FLUX. + flux_ene_e = flux_ene_e - & + xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & + xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & + s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) + end do + flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e + end if + + ! HYPOELASTIC STRESS EVOLUTION FLUX. + if (hypoelasticity) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, strxe - strxb + 1 + flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) end do + end if - ! ENERGY FLUX. - ! f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - xi_M*(vel_L(idx1)*(E_L + pres_L) + & - s_M*(xi_L*(E_L + (s_S - vel_L(idx1))* & - (rho_L*s_S + pres_L/ & - (s_L - vel_L(idx1)))) - E_L)) & - + xi_P*(vel_R(idx1)*(E_R + pres_R) + & - s_P*(xi_R*(E_R + (s_S - vel_R(idx1))* & - (rho_R*s_S + pres_R/ & - (s_R - vel_R(idx1)))) - E_R)) & - + (s_M/s_L)*(s_P/s_R)*pcorr*s_S - - ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux - if (elasticity) then - flux_ene_e = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - idxi = dir_idx(i) - ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) = & - flux_rs${XYZ}$_vf(j, k, l, contxe + idxi) & - - xi_M*tau_e_L(dir_idx_tau(i)) - xi_P*tau_e_R(dir_idx_tau(i)) - ! ENERGY ELASTIC FLUX. - flux_ene_e = flux_ene_e - & - xi_M*(vel_L(idxi)*tau_e_L(dir_idx_tau(i)) + & - s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i))/(s_L - vel_L(i)))))) - & - xi_P*(vel_R(idxi)*tau_e_R(dir_idx_tau(i)) + & - s_P*(xi_R*((s_S - vel_R(i))*(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) - end do - flux_rs${XYZ}$_vf(j, k, l, E_idx) = flux_rs${XYZ}$_vf(j, k, l, E_idx) + flux_ene_e - end if + ! VOLUME FRACTION FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end do - ! HYPOELASTIC STRESS EVOLUTION FLUX. - if (hypoelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, strxe - strxb + 1 - flux_rs${XYZ}$_vf(j, k, l, strxb - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(idx1)*tau_e_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(idx1)*tau_e_R(i)) - end do - end if + ! VOLUME FRACTION SOURCE FLUX. + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + idxi = dir_idx(i) + vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & + xi_M*(vel_L(idxi) + & + dir_flg(idxi)* & + s_M*(xi_L - 1._wp)) & + + xi_P*(vel_R(idxi) + & + dir_flg(idxi)* & + s_P*(xi_R - 1._wp)) + end do - ! VOLUME FRACTION FLUX. - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, i) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end do + ! COLOR FUNCTION FLUX + if (surface_tension) then + flux_rs${XYZ}$_vf(j, k, l, c_idx) = & + xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & + *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & + *(vel_R(idx1) + s_P*(xi_R - 1._wp)) + end if - ! VOLUME FRACTION SOURCE FLUX. + ! REFERENCE MAP FLUX. + if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - idxi = dir_idx(i) - vel_src_rs${XYZ}$_vf(j, k, l, idxi) = & - xi_M*(vel_L(idxi) + & - dir_flg(idxi)* & - s_M*(xi_L - 1._wp)) & - + xi_P*(vel_R(idxi) + & - dir_flg(idxi)* & - s_P*(xi_R - 1._wp)) + flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & + xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + - rho_L*vel_L(idx1)*xi_field_L(i)) + & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & + - rho_R*vel_R(idx1)*xi_field_R(i)) end do + end if - ! COLOR FUNCTION FLUX - if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, c_idx) = & - xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, c_idx) & - *(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, c_idx) & - *(vel_R(idx1) + s_P*(xi_R - 1._wp)) - end if + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) - ! REFERENCE MAP FLUX. - if (hyperelasticity) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, xibeg - 1 + i) = & - xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & - - rho_L*vel_L(idx1)*xi_field_L(i)) + & - xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - - rho_R*vel_R(idx1)*xi_field_R(i)) - end do - end if + if (chemistry) then + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) + Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = vel_src_rs${XYZ}$_vf(j, k, l, idx1) + flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & + + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) + flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + end do + end if - if (chemistry) then + ! Geometrical source flux for cylindrical coordinates + #:if (NORM_DIR == 2) + if (cyl_coord) then + !Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*rho_L*Y_L*(vel_L(idx1) + s_M*(xi_L - 1._wp)) & - + xi_P*rho_R*Y_R*(vel_R(idx1) + s_P*(xi_R - 1._wp)) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + do i = 1, E_idx + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + end do + ! Recalculating the radial momentum geometric source flux + flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & + xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + + xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + ! Geometrical source of the void fraction(s) is zero + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp end do end if + #:endif + #:if (NORM_DIR == 3) + if (grid_geometry == 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, sys_size + flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + end do - ! Geometrical source flux for cylindrical coordinates - #:if (NORM_DIR == 2) - if (cyl_coord) then - !Substituting the advective flux into the inviscid geometrical source flux - $:GPU_LOOP(parallelism='[seq]') - do i = 1, E_idx - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) - end do - ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, contxe + idx1) = & - xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - + xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - ! Geometrical source of the void fraction(s) is zero - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - end if - #:endif - #:if (NORM_DIR == 3) - if (grid_geometry == 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp - end do - - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & - -xi_M*(rho_L*(vel_L(idx1)* & - vel_L(idx1) + & - s_M*(xi_L*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_L(idx1)) - vel_L(idx1)))) & - - xi_P*(rho_R*(vel_R(idx1)* & - vel_R(idx1) + & - s_P*(xi_R*(dir_flg(idx1)*s_S + & - (1._wp - dir_flg(idx1))* & - vel_R(idx1)) - vel_R(idx1)))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxb + 1) = & + -xi_M*(rho_L*(vel_L(idx1)* & + vel_L(idx1) + & + s_M*(xi_L*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_L(idx1)) - vel_L(idx1)))) & + - xi_P*(rho_R*(vel_R(idx1)* & + vel_R(idx1) + & + s_P*(xi_R*(dir_flg(idx1)*s_S + & + (1._wp - dir_flg(idx1))* & + vel_R(idx1)) - vel_R(idx1)))) + flux_gsrc_rs${XYZ}$_vf(j, k, l, momxe) = flux_rs${XYZ}$_vf(j, k, l, momxb + 1) - end if - #:endif + end if + #:endif - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -3684,177 +3684,177 @@ contains if (norm_dir == ${NORM_DIR}$) then #:block UNDEF_AMD $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld, s_L, s_R, s_M, s_starL, s_starR, pTot_L, pTot_R, p_star, rhoL_star, rhoR_star, E_starL, E_starR, sqrt_rhoL_star, sqrt_rhoR_star, denom_ds, sign_Bx, vL_star, vR_star, wL_star, wR_star, v_double, w_double, By_double, Bz_double, E_doubleL, E_doubleR, E_double]', copyin='[norm_dir]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - - ! (1) Extract the left/right primitive states - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic - do i = 1, num_vels - vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) - vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) - end do + ! (1) Extract the left/right primitive states + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - vel_rms%L = sum(vel%L**2._wp) - vel_rms%R = sum(vel%R**2._wp) + ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic + do i = 1, num_vels + vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) + end do - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + vel_rms%L = sum(vel%L**2._wp) + vel_rms%R = sum(vel%R**2._wp) - pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - - ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated - B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] - B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] - else ! 2D/3D: Bx, By, Bz as variables - B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] - B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] - end if - end if + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - ! Sum properties of all fluid components - rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp - rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho%L = rho%L + alpha_rho_L(i) - gamma%L = gamma%L + alpha_L(i)*gammas(i) - pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) - qv%L = qv%L + alpha_rho_L(i)*qvs(i) - - rho%R = rho%R + alpha_rho_R(i) - gamma%R = gamma%R + alpha_R(i)*gammas(i) - pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) - qv%R = qv%R + alpha_rho_R(i)*qvs(i) - end do + pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - pres_mag%L = 0.5_wp*sum(B%L**2._wp) - pres_mag%R = 0.5_wp*sum(B%R**2._wp) - E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L - E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy - H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L - H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - - ! (2) Compute fast wave speeds - call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) - call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) - call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) - call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) - - ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] - s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) - s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) - - pTot_L = pres%L + pres_mag%L - pTot_R = pres%R + pres_mag%R - - s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & - (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & - ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) - - ! (4) Compute star state variables - rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) - rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) - p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) - E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) - E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) - - ! (5) Compute left/right state vectors and fluxes - U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] - U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] - U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] - U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] - - ! Compute the left/right fluxes - F_L(1) = U_L(2) - F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L - F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) - F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) - F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) - - F_R(1) = U_R(2) - F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R - F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) - F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) - F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) - ! Compute the star flux using HLL relation - F_starL = F_L + s_L*(U_starL - U_L) - F_starR = F_R + s_R*(U_starR - U_R) - ! Compute the rotational (Alfvén) speeds - s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) - s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) - ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] - sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) - vL_star = vel%L(2); wL_star = vel%L(3) - vR_star = vel%R(2); wR_star = vel%R(3) - - ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] - denom_ds = sqrt_rhoL_star + sqrt_rhoR_star - sign_Bx = sign(1._wp, B%L(1)) - v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds - w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds - By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds - Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds - - E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_double = 0.5_wp*(E_doubleL + E_doubleR) - - U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] - U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] - - ! (11) Choose HLLD flux based on wave-speed regions - if (0.0_wp <= s_L) then - F_hlld = F_L - else if (0.0_wp <= s_starL) then - F_hlld = F_L + s_L*(U_starL - U_L) - else if (0.0_wp <= s_M) then - F_hlld = F_starL + s_starL*(U_doubleL - U_starL) - else if (0.0_wp <= s_starR) then - F_hlld = F_starR + s_starR*(U_doubleR - U_starR) - else if (0.0_wp <= s_R) then - F_hlld = F_R + s_R*(U_starR - U_R) - else - F_hlld = F_R + ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated + B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] + B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] + else ! 2D/3D: Bx, By, Bz as variables + B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] + B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] end if + end if - ! (12) Reorder and write temporary variables to the flux array - ! Mass - flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component - ! Momentum - flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) - ! Magnetic field - if (n == 0) then - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) - else - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) - end if - ! Energy - flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) - ! Partial fraction - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) - end do + ! Sum properties of all fluid components + rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp + rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho%L = rho%L + alpha_rho_L(i) + gamma%L = gamma%L + alpha_L(i)*gammas(i) + pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) + qv%L = qv%L + alpha_rho_L(i)*qvs(i) + + rho%R = rho%R + alpha_rho_R(i) + gamma%R = gamma%R + alpha_R(i)*gammas(i) + pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) + qv%R = qv%R + alpha_rho_R(i)*qvs(i) + end do + + pres_mag%L = 0.5_wp*sum(B%L**2._wp) + pres_mag%R = 0.5_wp*sum(B%R**2._wp) + E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L + E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy + H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L + H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + + ! (2) Compute fast wave speeds + call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L) + call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R) + call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) + call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) + + ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] + s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) + s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) + + pTot_L = pres%L + pres_mag%L + pTot_R = pres%R + pres_mag%R + + s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & + (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & + ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) + + ! (4) Compute star state variables + rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) + rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) + p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) + E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) + E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) + + ! (5) Compute left/right state vectors and fluxes + U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] + U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] + U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] + U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] + + ! Compute the left/right fluxes + F_L(1) = U_L(2) + F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L + F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) + F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) + F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) + + F_R(1) = U_R(2) + F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R + F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) + F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) + F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) + ! Compute the star flux using HLL relation + F_starL = F_L + s_L*(U_starL - U_L) + F_starR = F_R + s_R*(U_starR - U_R) + ! Compute the rotational (Alfvén) speeds + s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) + s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) + ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] + sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) + vL_star = vel%L(2); wL_star = vel%L(3) + vR_star = vel%R(2); wR_star = vel%R(3) + + ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] + denom_ds = sqrt_rhoL_star + sqrt_rhoR_star + sign_Bx = sign(1._wp, B%L(1)) + v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds + w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds + By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds + Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds + + E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_double = 0.5_wp*(E_doubleL + E_doubleR) + + U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] + U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] + + ! (11) Choose HLLD flux based on wave-speed regions + if (0.0_wp <= s_L) then + F_hlld = F_L + else if (0.0_wp <= s_starL) then + F_hlld = F_L + s_L*(U_starL - U_L) + else if (0.0_wp <= s_M) then + F_hlld = F_starL + s_starL*(U_doubleL - U_starL) + else if (0.0_wp <= s_starR) then + F_hlld = F_starR + s_starR*(U_doubleR - U_starR) + else if (0.0_wp <= s_R) then + F_hlld = F_R + s_R*(U_starR - U_R) + else + F_hlld = F_R + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + ! (12) Reorder and write temporary variables to the flux array + ! Mass + flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component + ! Momentum + flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) + ! Magnetic field + if (n == 0) then + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) + else + flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) + end if + ! Energy + flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) + ! Partial fraction + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endblock UNDEF_AMD end if @@ -4060,53 +4060,53 @@ contains if (bc_x%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsx_vf(-1, k, l, i) = & - qR_prim_rsx_vf(0, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsx_vf(-1, k, l, i) = & + qR_prim_rsx_vf(0, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dx_vf(i)%sf(-1, k, l) = & - dqR_prim_dx_vf(i)%sf(0, k, l) - end do + dqL_prim_dx_vf(i)%sf(-1, k, l) = & + dqR_prim_dx_vf(i)%sf(0, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dy_vf(i)%sf(-1, k, l) = & - dqR_prim_dy_vf(i)%sf(0, k, l) - end do + dqL_prim_dy_vf(i)%sf(-1, k, l) = & + dqR_prim_dy_vf(i)%sf(0, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqL_prim_dz_vf(i)%sf(-1, k, l) = & - dqR_prim_dz_vf(i)%sf(0, k, l) - end do + dqL_prim_dz_vf(i)%sf(-1, k, l) = & + dqR_prim_dz_vf(i)%sf(0, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4119,54 +4119,54 @@ contains if (bc_x%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end $:GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsx_vf(m + 1, k, l, i) = & - qL_prim_rsx_vf(m, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsx_vf(m + 1, k, l, i) = & + qL_prim_rsx_vf(m, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dx_vf(i)%sf(m, k, l) - end do + dqR_prim_dx_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dx_vf(i)%sf(m, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dy_vf(i)%sf(m, k, l) - end do + dqR_prim_dy_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dy_vf(i)%sf(m, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do k = isy%beg, isy%end + do i = momxb, momxe + do l = isz%beg, isz%end + do k = isy%beg, isy%end - dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & - dqL_prim_dz_vf(i)%sf(m, k, l) - end do + dqR_prim_dz_vf(i)%sf(m + 1, k, l) = & + dqL_prim_dz_vf(i)%sf(m, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4182,50 +4182,50 @@ contains if (bc_y%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsy_vf(-1, k, l, i) = & - qR_prim_rsy_vf(0, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsy_vf(-1, k, l, i) = & + qR_prim_rsy_vf(0, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, -1, l) = & - dqR_prim_dx_vf(i)%sf(j, 0, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, -1, l) = & + dqR_prim_dx_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, -1, l) = & - dqR_prim_dy_vf(i)%sf(j, 0, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, -1, l) = & + dqR_prim_dy_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, -1, l) = & - dqR_prim_dz_vf(i)%sf(j, 0, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, -1, l) = & + dqR_prim_dz_vf(i)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4236,50 +4236,50 @@ contains if (bc_y%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end $:GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsy_vf(n + 1, k, l, i) = & - qL_prim_rsy_vf(n, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsy_vf(n + 1, k, l, i) = & + qL_prim_rsy_vf(n, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dx_vf(i)%sf(j, n, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dx_vf(i)%sf(j, n, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dy_vf(i)%sf(j, n, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dy_vf(i)%sf(j, n, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do l = isz%beg, isz%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & - dqL_prim_dz_vf(i)%sf(j, n, l) - end do + do i = momxb, momxe + do l = isz%beg, isz%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, n + 1, l) = & + dqL_prim_dz_vf(i)%sf(j, n, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4293,46 +4293,46 @@ contains if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsz_vf(-1, k, l, i) = & - qR_prim_rsz_vf(0, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qL_prim_rsz_vf(-1, k, l, i) = & + qR_prim_rsz_vf(0, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dx_vf(i)%sf(j, k, -1) = & - dqR_prim_dx_vf(i)%sf(j, k, 0) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dx_vf(i)%sf(j, k, -1) = & + dqR_prim_dx_vf(i)%sf(j, k, 0) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dy_vf(i)%sf(j, k, -1) = & - dqR_prim_dy_vf(i)%sf(j, k, 0) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dy_vf(i)%sf(j, k, -1) = & + dqR_prim_dy_vf(i)%sf(j, k, 0) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqL_prim_dz_vf(i)%sf(j, k, -1) = & - dqR_prim_dz_vf(i)%sf(j, k, 0) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqL_prim_dz_vf(i)%sf(j, k, -1) = & + dqR_prim_dz_vf(i)%sf(j, k, 0) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4341,48 +4341,48 @@ contains if (bc_z%end == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at end $:GPU_PARALLEL_LOOP(collapse=3) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsz_vf(p + 1, k, l, i) = & - qL_prim_rsz_vf(p, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + qR_prim_rsz_vf(p + 1, k, l, i) = & + qL_prim_rsz_vf(p, k, l, i) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dx_vf(i)%sf(j, k, p) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dx_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dx_vf(i)%sf(j, k, p) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dy_vf(i)%sf(j, k, p) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dy_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dy_vf(i)%sf(j, k, p) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do i = momxb, momxe - do k = isy%beg, isy%end - do j = isx%beg, isx%end - dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & - dqL_prim_dz_vf(i)%sf(j, k, p) - end do + do i = momxb, momxe + do k = isy%beg, isy%end + do j = isx%beg, isx%end + dqR_prim_dz_vf(i)%sf(j, k, p + 1) = & + dqL_prim_dz_vf(i)%sf(j, k, p) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4425,48 +4425,48 @@ contains if (norm_dir == 1) then - if (viscous .or. (surface_tension)) then - - $:GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = 0._wp - end do + if (viscous .or. (surface_tension)) then + + $:GPU_PARALLEL_LOOP(collapse=4) + do i = momxb, E_idx + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(j, k, l) = 0._wp - end if - end do + do i = E_idx, chemxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(j, k, l) = 0._wp + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) - end do + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsx_vf(j, k, l, i) = mom_sp(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4475,45 +4475,45 @@ contains if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = 0._wp - end do + do i = momxb, E_idx + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(k, j, l) = 0._wp - end if - end do + do i = E_idx, chemxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(k, j, l) = 0._wp + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) - end do + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4522,45 +4522,45 @@ contains if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = 0._wp - end do + do i = momxb, E_idx + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = 0._wp end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (chem_params%diffusion) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = E_idx, chemxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - if (i == E_idx .or. i >= chemxb) then - flux_src_vf(i)%sf(l, k, j) = 0._wp - end if - end do + do i = E_idx, chemxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + if (i == E_idx .or. i >= chemxb) then + flux_src_vf(i)%sf(l, k, j) = 0._wp + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (qbmm) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, 4 - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + 1 - mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) - end do + do i = 1, 4 + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + 1 + mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -4619,119 +4619,119 @@ contains integer :: idx_rp(3) !!< Indices $(j,k,l)$ of 'right' point for averaging. $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_rp, avg_v_int, avg_dvdx_int, avg_dvdy_int, avg_dvdz_int, Re_s, Re_b, vel_src_int, r_eff, divergence_cyl, stress_vector_shear, stress_normal_bulk, div_v_term_const]') - do l = iz%beg, iz%end - do k = iy%beg, iy%end - do j = ix%beg, ix%end + do l = iz%beg, iz%end + do k = iy%beg, iy%end + do j = ix%beg, ix%end + + ! Determine indices for the 'right' state for averaging across the interface + idx_rp = [j, k, l] + idx_rp(norm_dir) = idx_rp(norm_dir) + 1 + + ! Average velocities and their derivatives at the interface + ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + + avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & + dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + if (num_dims > 1) then + avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & + dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdy_int(i_vel) = 0.0_wp + end if + if (num_dims > 2) then + avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & + dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + else + avg_dvdz_int(i_vel) = 0.0_wp + end if + end do - ! Determine indices for the 'right' state for averaging across the interface - idx_rp = [j, k, l] - idx_rp(norm_dir) = idx_rp(norm_dir) + 1 + ! Get Re numbers and interface velocity for viscous work + select case (norm_dir) + case (1) ! x-face (axial face in z_cyl direction) + Re_s = Re_avg_rsx_vf(j, k, l, 1) + Re_b = Re_avg_rsx_vf(j, k, l, 2) + vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) + r_eff = y_cc(k) + case (2) ! y-face (radial face in r_cyl direction) + Re_s = Re_avg_rsy_vf(k, j, l, 1) + Re_b = Re_avg_rsy_vf(k, j, l, 2) + vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) + r_eff = y_cb(k) + case (3) ! z-face (azimuthal face in theta_cyl direction) + Re_s = Re_avg_rsz_vf(l, k, j, 1) + Re_b = Re_avg_rsz_vf(l, k, j, 2) + vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) + r_eff = y_cc(k) + end select + + ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) + divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff + if (num_dims > 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff + #:endif + end if - ! Average velocities and their derivatives at the interface - ! For cylindrical: x-dir ~ axial (z_cyl), y-dir ~ radial (r_cyl), z-dir ~ azimuthal (theta_cyl) - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - avg_v_int(i_vel) = 0.5_wp*(velL_vf(i_vel)%sf(j, k, l) + velR_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + stress_vector_shear = 0.0_wp + stress_normal_bulk = 0.0_wp - avg_dvdx_int(i_vel) = 0.5_wp*(dvelL_dx_vf(i_vel)%sf(j, k, l) + & - dvelR_dx_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) + if (shear_stress) then + div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s + + select case (norm_dir) + case (1) ! X-face (axial normal, z_cyl) + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const if (num_dims > 1) then - avg_dvdy_int(i_vel) = 0.5_wp*(dvelL_dy_vf(i_vel)%sf(j, k, l) + & - dvelR_dy_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdy_int(i_vel) = 0.0_wp + stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s end if if (num_dims > 2) then - avg_dvdz_int(i_vel) = 0.5_wp*(dvelL_dz_vf(i_vel)%sf(j, k, l) + & - dvelR_dz_vf(i_vel)%sf(idx_rp(1), idx_rp(2), idx_rp(3))) - else - avg_dvdz_int(i_vel) = 0.0_wp + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s + #:endif end if - end do - - ! Get Re numbers and interface velocity for viscous work - select case (norm_dir) - case (1) ! x-face (axial face in z_cyl direction) - Re_s = Re_avg_rsx_vf(j, k, l, 1) - Re_b = Re_avg_rsx_vf(j, k, l, 2) - vel_src_int = vel_src_rsx_vf(j, k, l, 1:num_dims) - r_eff = y_cc(k) - case (2) ! y-face (radial face in r_cyl direction) - Re_s = Re_avg_rsy_vf(k, j, l, 1) - Re_b = Re_avg_rsy_vf(k, j, l, 2) - vel_src_int = vel_src_rsy_vf(k, j, l, 1:num_dims) - r_eff = y_cb(k) - case (3) ! z-face (azimuthal face in theta_cyl direction) - Re_s = Re_avg_rsz_vf(l, k, j, 1) - Re_b = Re_avg_rsz_vf(l, k, j, 2) - vel_src_int = vel_src_rsz_vf(l, k, j, 1:num_dims) - r_eff = y_cc(k) - end select - - ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) - divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff - if (num_dims > 2) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff - #:endif - end if - - stress_vector_shear = 0.0_wp - stress_normal_bulk = 0.0_wp - - if (shear_stress) then - div_v_term_const = -(2.0_wp/3.0_wp)*divergence_cyl/Re_s - - select case (norm_dir) - case (1) ! X-face (axial normal, z_cyl) - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - if (num_dims > 1) then - stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - end if - if (num_dims > 2) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - stress_vector_shear(3) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - #:endif - end if - case (2) ! Y-face (radial normal, r_cyl) - if (num_dims > 1) then - stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const - if (num_dims > 2) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - #:endif - end if - else - stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const - end if - case (3) ! Z-face (azimuthal normal, theta_cyl) + case (2) ! Y-face (radial normal, r_cyl) + if (num_dims > 1) then + stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const if (num_dims > 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s - stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s #:endif end if - end select + else + stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const + end if + case (3) ! Z-face (azimuthal normal, theta_cyl) + if (num_dims > 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + stress_vector_shear(1) = (avg_dvdz_int(1)/r_eff + avg_dvdx_int(3))/Re_s + stress_vector_shear(2) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s + stress_vector_shear(3) = (2.0_wp*(avg_dvdz_int(3)/r_eff + avg_v_int(2)/r_eff))/Re_s + div_v_term_const + #:endif + end if + end select - $:GPU_LOOP(parallelism='[seq]') - do i_vel = 1, num_dims - flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) - end do - end if + $:GPU_LOOP(parallelism='[seq]') + do i_vel = 1, num_dims + flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) = flux_src_vf(momxb + i_vel - 1)%sf(j, k, l) - stress_vector_shear(i_vel) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(i_vel)*stress_vector_shear(i_vel) + end do + end if - if (bulk_stress) then - stress_normal_bulk = divergence_cyl/Re_b + if (bulk_stress) then + stress_normal_bulk = divergence_cyl/Re_b - flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk - end if + flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) = flux_src_vf(momxb + norm_dir - 1)%sf(j, k, l) - stress_normal_bulk + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - vel_src_int(norm_dir)*stress_normal_bulk + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cylindrical_viscous_source_flux @@ -4787,88 +4787,88 @@ contains real(wp) :: divergence_v !< Velocity divergence at interface. $:GPU_PARALLEL_LOOP(collapse=3, private='[idx_right_phys, vel_grad_avg, current_tau_shear, current_tau_bulk, vel_src_at_interface, Re_shear, Re_bulk, divergence_v, i_dim, vel_comp_idx]') - do l_loop = isz%beg, isz%end - do k_loop = isy%beg, isy%end - do j_loop = isx%beg, isx%end + do l_loop = isz%beg, isz%end + do k_loop = isy%beg, isy%end + do j_loop = isx%beg, isx%end + + idx_right_phys(1) = j_loop + idx_right_phys(2) = k_loop + idx_right_phys(3) = l_loop + idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + + vel_grad_avg = 0.0_wp + do vel_comp_idx = 1, num_dims + vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + if (num_dims > 1) then + vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + end if + if (num_dims > 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + #:endif + end if + end do - idx_right_phys(1) = j_loop - idx_right_phys(2) = k_loop - idx_right_phys(3) = l_loop - idx_right_phys(norm_dir) = idx_right_phys(norm_dir) + 1 + divergence_v = 0.0_wp + do i_dim = 1, num_dims + divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) + end do - vel_grad_avg = 0.0_wp - do vel_comp_idx = 1, num_dims - vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - if (num_dims > 1) then - vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - end if - if (num_dims > 2) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - vel_grad_avg(vel_comp_idx, 3) = 0.5_wp*(dvelL_dz_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dz_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) - #:endif - end if + vel_src_at_interface = 0.0_wp + if (norm_dir == 1) then + Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) + Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) end do - - divergence_v = 0.0_wp + else if (norm_dir == 2) then + Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) + Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) do i_dim = 1, num_dims - divergence_v = divergence_v + vel_grad_avg(i_dim, i_dim) + vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) end do + else + Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) + Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) + do i_dim = 1, num_dims + vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) + end do + end if - vel_src_at_interface = 0.0_wp - if (norm_dir == 1) then - Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) - Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) - end do - else if (norm_dir == 2) then - Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) - Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) - end do - else - Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) - Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) - do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) - end do - end if - - if (shear_stress) then - ! current_tau_shear = 0.0_wp - call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) + if (shear_stress) then + ! current_tau_shear = 0.0_wp + call s_calculate_shear_stress_tensor(vel_grad_avg, Re_shear, divergence_v, current_tau_shear) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_shear(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_shear(norm_dir, i_dim) + end do + end if - if (bulk_stress) then - ! current_tau_bulk = 0.0_wp - call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) + if (bulk_stress) then + ! current_tau_bulk = 0.0_wp + call s_calculate_bulk_stress_tensor(Re_bulk, divergence_v, current_tau_bulk) - do i_dim = 1, num_dims - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) + do i_dim = 1, num_dims + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(momxb + i_dim - 1)%sf(j_loop, k_loop, l_loop) - current_tau_bulk(norm_dir, i_dim) - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & - flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & - vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) - end do - end if + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) = & + flux_src_vf(E_idx)%sf(j_loop, k_loop, l_loop) - & + vel_src_at_interface(i_dim)*current_tau_bulk(norm_dir, i_dim) + end do + end if - end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_cartesian_viscous_source_flux @@ -4951,153 +4951,153 @@ contains ! Reshaping Outputted Data in y-direction if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_vf(i)%sf(k, j, l) = & - flux_rsy_vf(j, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_vf(i)%sf(k, j, l) = & + flux_rsy_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (cyl_coord) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_gsrc_vf(i)%sf(k, j, l) = & - flux_gsrc_rsy_vf(j, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_gsrc_vf(i)%sf(k, j, l) = & + flux_gsrc_rsy_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(advxb)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, advxb) - end do + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(advxb)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, advxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do j = is1%beg, is1%end - do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = & - flux_src_rsy_vf(j, k, l, i) - end do + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do j = is1%beg, is1%end + do k = is2%beg, is2%end + flux_src_vf(i)%sf(k, j, l) = & + flux_src_rsy_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if ! Reshaping Outputted Data in z-direction elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_vf(i)%sf(l, k, j) = & - flux_rsz_vf(j, k, l, i) - end do + flux_vf(i)%sf(l, k, j) = & + flux_rsz_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (grid_geometry == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end + do i = 1, sys_size + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end - flux_gsrc_vf(i)%sf(l, k, j) = & - flux_gsrc_rsz_vf(j, k, l, i) - end do + flux_gsrc_vf(i)%sf(l, k, j) = & + flux_gsrc_rsz_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(collapse=3) - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(advxb)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, advxb) - end do + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(advxb)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, advxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do j = is1%beg, is1%end - do k = is2%beg, is2%end - do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = & - flux_src_rsz_vf(j, k, l, i) - end do + do i = advxb + 1, advxe + do j = is1%beg, is1%end + do k = is2%beg, is2%end + do l = is3%beg, is3%end + flux_src_vf(i)%sf(l, k, j) = & + flux_src_rsz_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_vf(i)%sf(j, k, l) = & - flux_rsx_vf(j, k, l, i) - end do + do i = 1, sys_size + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_vf(i)%sf(j, k, l) = & + flux_rsx_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(advxb)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, advxb) - end do + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(advxb)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, advxb) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (riemann_solver == 1 .or. riemann_solver == 4) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = advxb + 1, advxe - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - flux_src_vf(i)%sf(j, k, l) = & - flux_src_rsx_vf(j, k, l, i) - end do + do i = advxb + 1, advxe + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + flux_src_vf(i)%sf(j, k, l) = & + flux_src_rsx_vf(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -5146,4 +5146,4 @@ contains end subroutine s_finalize_riemann_solvers_module -end module m_riemann_solvers \ No newline at end of file +end module m_riemann_solvers diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index cebe8fd25..5e56dd156 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1221,16 +1221,16 @@ contains if (time_stepper /= 1) then $:GPU_PARALLEL_LOOP(collapse=4, copyin='[idwbuff]') - do i = 1, sys_size - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - q_cons_ts(2)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + q_cons_ts(2)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() stor = 2 end if diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 1e28a6f7a..86e187dca 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -90,140 +90,140 @@ contains if (id == 1) then $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_x(j, k, l, 1) - w2L = gL_x(j, k, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_x(j, k, l, 3) + w1L = gL_x(j, k, l, 1) + w2L = gL_x(j, k, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_x(j, k, l, 3) - w1R = gR_x(j + 1, k, l, 1) - w2R = gR_x(j + 1, k, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_x(j + 1, k, l, 3) + w1R = gR_x(j + 1, k, l, 1) + w2R = gR_x(j + 1, k, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_x(j + 1, k, l, 3) - normWL = gL_x(j, k, l, num_dims + 1) - normWR = gR_x(j + 1, k, l, num_dims + 1) + normWL = gL_x(j, k, l, num_dims + 1) + normWR = gR_x(j + 1, k, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(1, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(1, i)*vSrc_rsx_vf(j, k, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(1, i)*vSrc_rsx_vf(j, k, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 1) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (id == 2) then $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_y(k, j, l, 1) - w2L = gL_y(k, j, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_y(k, j, l, 3) + w1L = gL_y(k, j, l, 1) + w2L = gL_y(k, j, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_y(k, j, l, 3) - w1R = gR_y(k + 1, j, l, 1) - w2R = gR_y(k + 1, j, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_y(k + 1, j, l, 3) + w1R = gR_y(k + 1, j, l, 1) + w2R = gR_y(k + 1, j, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_y(k + 1, j, l, 3) - normWL = gL_y(k, j, l, num_dims + 1) - normWR = gR_y(k + 1, j, l, num_dims + 1) + normWL = gL_y(k, j, l, num_dims + 1) + normWR = gR_y(k + 1, j, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(2, i)*vSrc_rsy_vf(k, j, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(2, i)*vSrc_rsy_vf(k, j, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (id == 3) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_z(l, k, j, 1) - w2L = gL_z(l, k, j, 2) - w3L = 0._wp - if (p > 0) w3L = gL_z(l, k, j, 3) + w1L = gL_z(l, k, j, 1) + w2L = gL_z(l, k, j, 2) + w3L = 0._wp + if (p > 0) w3L = gL_z(l, k, j, 3) - w1R = gR_z(l + 1, k, j, 1) - w2R = gR_z(l + 1, k, j, 2) - w3R = 0._wp - if (p > 0) w3R = gR_z(l + 1, k, j, 3) + w1R = gR_z(l + 1, k, j, 1) + w2R = gR_z(l + 1, k, j, 2) + w3R = 0._wp + if (p > 0) w3R = gR_z(l + 1, k, j, 3) - normWL = gL_z(l, k, j, num_dims + 1) - normWR = gR_z(l + 1, k, j, num_dims + 1) + normWL = gL_z(l, k, j, num_dims + 1) + normWR = gR_z(l + 1, k, j, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(3, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(3, i)*vSrc_rsz_vf(l, k, j, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(3, i)*vSrc_rsz_vf(l, k, j, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) - end if - end do + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) + end if end do end do + end do $:END_GPU_PARALLEL_LOOP() #:endif end if @@ -246,58 +246,58 @@ contains ! compute gradient components $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & - (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(1)%sf(j, k, l) = 1._wp/(x_cc(j + 1) - x_cc(j - 1))* & + (q_prim_vf(c_idx)%sf(j + 1, k, l) - q_prim_vf(c_idx)%sf(j - 1, k, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & - (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(2)%sf(j, k, l) = 1._wp/(y_cc(k + 1) - y_cc(k - 1))* & + (q_prim_vf(c_idx)%sf(j, k + 1, l) - q_prim_vf(c_idx)%sf(j, k - 1, l)) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & - (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) - end do + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(3)%sf(j, k, l) = 1._wp/(z_cc(l + 1) - z_cc(l - 1))* & + (q_prim_vf(c_idx)%sf(j, k, l + 1) - q_prim_vf(c_idx)%sf(j, k, l - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(collapse=3) - do l = 0, p - do k = 0, n - do j = 0, m - c_divs(num_dims + 1)%sf(j, k, l) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - c_divs(num_dims + 1)%sf(j, k, l) = & - c_divs(num_dims + 1)%sf(j, k, l) + & - c_divs(i)%sf(j, k, l)**2._wp - end do - !c_divs(num_dims + 1)%sf(j, k, l) = & - !sqrt(c_divs(num_dims + 1)%sf(j, k, l)) + do l = 0, p + do k = 0, n + do j = 0, m + c_divs(num_dims + 1)%sf(j, k, l) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims c_divs(num_dims + 1)%sf(j, k, l) = & - sqrt(real(c_divs(num_dims + 1)%sf(j, k, l), kind=wp)) + c_divs(num_dims + 1)%sf(j, k, l) + & + c_divs(i)%sf(j, k, l)**2._wp end do + !c_divs(num_dims + 1)%sf(j, k, l) = & + !sqrt(c_divs(num_dims + 1)%sf(j, k, l)) + c_divs(num_dims + 1)%sf(j, k, l) = & + sqrt(real(c_divs(num_dims + 1)%sf(j, k, l), kind=wp)) end do end do + end do $:END_GPU_PARALLEL_LOOP() call s_populate_capillary_buffers(c_divs, bc_type) @@ -348,42 +348,42 @@ contains if (recon_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -409,4 +409,4 @@ contains end subroutine s_finalize_surface_tension_module -end module m_surface_tension \ No newline at end of file +end module m_surface_tension diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 41553540f..8a852bb40 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -544,57 +544,57 @@ contains if (bubbles_lagrange .and. .not. adap_dt) call s_update_lagrange_tdv_rk(stage=s) $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - if (s == 1 .and. nstage > 1) then - q_cons_ts(stor)%vf(i)%sf(j, k, l) = & - q_cons_ts(1)%vf(i)%sf(j, k, l) - end if - if (igr) then - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) - else - q_cons_ts(1)%vf(i)%sf(j, k, l) = & - (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & - + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & - + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) - end if - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + if (s == 1 .and. nstage > 1) then + q_cons_ts(stor)%vf(i)%sf(j, k, l) = & + q_cons_ts(1)%vf(i)%sf(j, k, l) + end if + if (igr) then + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) + else + q_cons_ts(1)%vf(i)%sf(j, k, l) = & + (rk_coef(s, 1)*q_cons_ts(1)%vf(i)%sf(j, k, l) & + + rk_coef(s, 2)*q_cons_ts(stor)%vf(i)%sf(j, k, l) & + + rk_coef(s, 3)*dt*rhs_vf(i)%sf(j, k, l))/rk_coef(s, 4) + end if end do end do end do + end do $:END_GPU_PARALLEL_LOOP() !Evolve pb and mv for non-polytropic qbmm if (qbmm .and. (.not. polytropic)) then $:GPU_PARALLEL_LOOP(collapse=5) - do i = 1, nb - do l = 0, p - do k = 0, n - do j = 0, m - do q = 1, nnode - if (s == 1 .and. nstage > 1) then - pb_ts(stor)%sf(j, k, l, q, i) = & - pb_ts(1)%sf(j, k, l, q, i) - mv_ts(stor)%sf(j, k, l, q, i) = & - mv_ts(1)%sf(j, k, l, q, i) - end if - pb_ts(1)%sf(j, k, l, q, i) = & - (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) - mv_ts(1)%sf(j, k, l, q, i) = & - (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & - + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & - + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) - end do + do i = 1, nb + do l = 0, p + do k = 0, n + do j = 0, m + do q = 1, nnode + if (s == 1 .and. nstage > 1) then + pb_ts(stor)%sf(j, k, l, q, i) = & + pb_ts(1)%sf(j, k, l, q, i) + mv_ts(stor)%sf(j, k, l, q, i) = & + mv_ts(1)%sf(j, k, l, q, i) + end if + pb_ts(1)%sf(j, k, l, q, i) = & + (rk_coef(s, 1)*pb_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*pb_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_pb(j, k, l, q, i))/rk_coef(s, 4) + mv_ts(1)%sf(j, k, l, q, i) = & + (rk_coef(s, 1)*mv_ts(1)%sf(j, k, l, q, i) & + + rk_coef(s, 2)*mv_ts(stor)%sf(j, k, l, q, i) & + + rk_coef(s, 3)*dt*rhs_mv(j, k, l, q, i))/rk_coef(s, 4) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -726,22 +726,22 @@ contains end if $:GPU_PARALLEL_LOOP(collapse=3, private='[vel, alpha, Re, rho, vel_sum, pres, gamma, pi_inf, c, H]') - do l = 0, p - do k = 0, n - do j = 0, m - if (igr) then - call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - else - call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) - end if + do l = 0, p + do k = 0, n + do j = 0, m + if (igr) then + call s_compute_enthalpy(q_cons_ts(1)%vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + else + call s_compute_enthalpy(q_prim_vf, pres, rho, gamma, pi_inf, Re, H, alpha, vel, vel_sum, j, k, l) + end if - ! Compute mixture sound speed - call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) + ! Compute mixture sound speed + call s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, alpha, vel_sum, 0._wp, c) - call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) - end do + call s_compute_dt_from_cfl(vel, c, max_dt, rho, Re, j, k, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_PARALLEL(copyout='[dt_local]', copyin='[max_dt]') @@ -774,16 +774,16 @@ contains call s_compute_body_forces_rhs(q_prim_vf_in, q_cons_vf, rhs_vf_in) $:GPU_PARALLEL_LOOP(collapse=4) - do i = momxb, E_idx - do l = 0, p - do k = 0, n - do j = 0, m - q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & - ldt*rhs_vf_in(i)%sf(j, k, l) - end do + do i = momxb, E_idx + do l = 0, p + do k = 0, n + do j = 0, m + q_cons_vf(i)%sf(j, k, l) = q_cons_vf(i)%sf(j, k, l) + & + ldt*rhs_vf_in(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() call nvtxEndRange @@ -801,66 +801,66 @@ contains if (t_step == t_step_start) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts2(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts2(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts2(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts2(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts1(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts1(2)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (t_step == t_step_start + 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts1(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts1(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else ! All other timesteps $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 0, m - q_prim_ts2(2)%vf(i)%sf(j, k, l) = q_prim_ts2(1)%vf(i)%sf(j, k, l) - q_prim_ts2(1)%vf(i)%sf(j, k, l) = q_prim_ts1(2)%vf(i)%sf(j, k, l) - q_prim_ts1(2)%vf(i)%sf(j, k, l) = q_prim_ts1(1)%vf(i)%sf(j, k, l) - q_prim_ts1(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) - end do + do i = 1, sys_size + do l = 0, p + do k = 0, n + do j = 0, m + q_prim_ts2(2)%vf(i)%sf(j, k, l) = q_prim_ts2(1)%vf(i)%sf(j, k, l) + q_prim_ts2(1)%vf(i)%sf(j, k, l) = q_prim_ts1(2)%vf(i)%sf(j, k, l) + q_prim_ts1(2)%vf(i)%sf(j, k, l) = q_prim_ts1(1)%vf(i)%sf(j, k, l) + q_prim_ts1(1)%vf(i)%sf(j, k, l) = q_prim_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -986,4 +986,4 @@ contains end subroutine s_finalize_time_steppers_module -end module m_time_steppers \ No newline at end of file +end module m_time_steppers diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index aee819607..1b01e0940 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -82,20 +82,234 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = momxb, E_idx + tau_Re_vf(i)%sf(j, k, l) = 0._wp + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + if (shear_stress) then ! Shear stresses + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end + do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do + end if + end if + + tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & + grad_x_vf(2)%sf(j, k, l))/ & + Re_visc(1) + + tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & + - 2._wp*grad_x_vf(1)%sf(j, k, l) & + - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + (3._wp*Re_visc(1)) $:GPU_LOOP(parallelism='[seq]') - do i = momxb, E_idx - tau_Re_vf(i)%sf(j, k, l) = 0._wp + do i = 1, 2 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) end do end do end do end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() + end if - if (shear_stress) then ! Shear stresses + if (bulk_stress) then ! Bulk stresses $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real + + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do + + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + + end do + end if + end if + + tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & + grad_y_vf(2)%sf(j, k, l) + & + q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + Re_visc(2) + + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) + + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (p == 0) return + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + + if (shear_stress) then ! Shear stresses + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -178,16 +392,16 @@ contains end if end if - tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & - grad_x_vf(2)%sf(j, k, l))/ & + tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & + Re_visc(1) + + tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & + q_prim_vf(momxe)%sf(j, k, l))/ & + y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & Re_visc(1) - tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & - - 2._wp*grad_x_vf(1)%sf(j, k, l) & - - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - (3._wp*Re_visc(1)) $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 + do i = 2, 3 tau_Re_vf(contxe + i)%sf(j, k, l) = & tau_Re_vf(contxe + i)%sf(j, k, l) - & tau_Re(2, i) @@ -196,14 +410,15 @@ contains tau_Re_vf(E_idx)%sf(j, k, l) - & q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) end do + end do end do end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if - if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + if (bulk_stress) then ! Bulk stresses + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -286,9 +501,7 @@ contains end if end if - tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & - grad_y_vf(2)%sf(j, k, l) + & - q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & Re_visc(2) tau_Re_vf(momxb + 1)%sf(j, k, l) = & @@ -302,219 +515,6 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP() - end if - - if (p == 0) return - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - - if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do - end if - end if - - tau_Re(2, 2) = -(2._wp/3._wp)*grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(1) - - tau_Re(2, 3) = ((grad_z_vf(2)%sf(j, k, l) - & - q_prim_vf(momxe)%sf(j, k, l))/ & - y_cc(k) + grad_y_vf(3)%sf(j, k, l))/ & - Re_visc(1) - - $:GPU_LOOP(parallelism='[seq]') - do i = 2, 3 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) - end do - - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do - - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - - end if - - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real - - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) - end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do - end if - end if - - tau_Re(2, 2) = grad_z_vf(3)%sf(j, k, l)/y_cc(k)/ & - Re_visc(2) - - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) - - end do - end do - end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -602,361 +602,361 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = iy%beg, iy%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j, k, l) - & - q_prim_qp%vf(i)%sf(j - 1, k, l))/ & - (x_cc(j) - x_cc(j - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = iy%beg, iy%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j, k, l) - & + q_prim_qp%vf(i)%sf(j - 1, k, l))/ & + (x_cc(j) - x_cc(j - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & - (q_prim_qp%vf(i)%sf(j + 1, k, l) - & - q_prim_qp%vf(i)%sf(j, k, l))/ & - (x_cc(j + 1) - x_cc(j)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(1)%vf(i)%sf(j, k, l) = & + (q_prim_qp%vf(i)%sf(j + 1, k, l) - & + q_prim_qp%vf(i)%sf(j, k, l))/ & + (x_cc(j + 1) - x_cc(j)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j, l) - & - q_prim_qp%vf(i)%sf(k, j - 1, l))/ & - (y_cc(j) - y_cc(j - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j, l) - & + q_prim_qp%vf(i)%sf(k, j - 1, l))/ & + (y_cc(j) - y_cc(j - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j + 1, l) - & - q_prim_qp%vf(i)%sf(k, j, l))/ & - (y_cc(j + 1) - y_cc(j)) - end do + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j + 1, l) - & + q_prim_qp%vf(i)%sf(k, j, l))/ & + (y_cc(j + 1) - y_cc(j)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) - - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) - end do + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) + + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 $:GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j) - & - q_prim_qp%vf(i)%sf(k, l, j - 1))/ & - (z_cc(j) - z_cc(j - 1)) - end do + dqL_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j) - & + q_prim_qp%vf(i)%sf(k, l, j - 1))/ & + (z_cc(j) - z_cc(j - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & - (q_prim_qp%vf(i)%sf(k, l, j + 1) - & - q_prim_qp%vf(i)%sf(k, l, j))/ & - (z_cc(j + 1) - z_cc(j)) - end do + dqR_prim_dz_n(3)%vf(i)%sf(k, l, j) = & + (q_prim_qp%vf(i)%sf(k, l, j + 1) - & + q_prim_qp%vf(i)%sf(k, l, j))/ & + (z_cc(j + 1) - z_cc(j)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dz_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(j, k, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(j, k, l)) - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dz_n(1)%vf(i)%sf(j, k, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j - 1, l)) - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dz_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & - dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dz_n(3)%vf(i)%sf(k, j, l) + & + dqR_prim_dz_n(3)%vf(i)%sf(k, j, l)) - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dz_n(2)%vf(i)%sf(k, j, l) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dy_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg + 1, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg + 1, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & - dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dy_n(2)%vf(i)%sf(k, l, j) + & + dqR_prim_dy_n(2)%vf(i)%sf(k, l, j)) - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dy_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg + 1, is3_viscous%end - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end + do j = is3_viscous%beg + 1, is3_viscous%end + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j - 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j - 1)) - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqL_prim_dx_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do j = is3_viscous%beg, is3_viscous%end - 1 - do l = is2_viscous%beg, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) + do j = is3_viscous%beg, is3_viscous%end - 1 + do l = is2_viscous%beg, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j + 1) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, l, j) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, l, j)) - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & - dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) = 25.e-2_wp* & + dqR_prim_dx_n(3)%vf(i)%sf(k, l, j) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() do i = iv%beg, iv%end @@ -1052,42 +1052,42 @@ contains if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1155,42 +1155,42 @@ contains if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() elseif (norm_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1248,21 +1248,21 @@ contains ! spatial derivatives inside the cell. $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(j)) & - *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j - 1, k, l)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(j)) & + *(wa_flg*vL_vf(i)%sf(j + 1, k, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j - 1, k, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in x-direction @@ -1277,21 +1277,21 @@ contains ! spatial derivatives inside the cell. $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(k)) & - *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k - 1, l)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(k)) & + *(wa_flg*vL_vf(i)%sf(j, k + 1, l) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k - 1, l)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() ! END: First-Order Spatial Derivatives in y-direction @@ -1306,21 +1306,21 @@ contains ! spatial derivatives inside the cell. $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg + 1, is3_viscous%end - 1 - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dv_ds_vf(i)%sf(j, k, l) = & - 1._wp/((1._wp + wa_flg)*dL(l)) & - *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & - + vR_vf(i)%sf(j, k, l) & - - vL_vf(i)%sf(j, k, l) & - - wa_flg*vR_vf(i)%sf(j, k, l - 1)) - end do + do l = is3_viscous%beg + 1, is3_viscous%end - 1 + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dv_ds_vf(i)%sf(j, k, l) = & + 1._wp/((1._wp + wa_flg)*dL(l)) & + *(wa_flg*vL_vf(i)%sf(j, k, l + 1) & + + vR_vf(i)%sf(j, k, l) & + - vL_vf(i)%sf(j, k, l) & + - wa_flg*vR_vf(i)%sf(j, k, l - 1)) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1362,148 +1362,148 @@ contains $:GPU_UPDATE(device='[is1_viscous,is2_viscous,is3_viscous]') $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_x%sf(j, k, l) = & - (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & - (x_cc(j + 1) - x_cc(j - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_x%sf(j, k, l) = & + (var%sf(j + 1, k, l) - var%sf(j - 1, k, l))/ & + (x_cc(j + 1) - x_cc(j - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_y%sf(j, k, l) = & - (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & - (y_cc(k + 1) - y_cc(k - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_y%sf(j, k, l) = & + (var%sf(j, k + 1, l) - var%sf(j, k - 1, l))/ & + (y_cc(k + 1) - y_cc(k - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - grad_z%sf(j, k, l) = & - (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & - (z_cc(l + 1) - z_cc(l - 1)) - end do + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + grad_z%sf(j, k, l) = & + (var%sf(j, k, l + 1) - var%sf(j, k, l - 1))/ & + (z_cc(l + 1) - z_cc(l - 1)) end do end do + end do $:END_GPU_PARALLEL_LOOP() end if $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(idwbuff(1)%beg, k, l) = & - (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & - (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) - grad_x%sf(idwbuff(1)%end, k, l) = & - (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & - (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(idwbuff(1)%beg, k, l) = & + (-3._wp*var%sf(idwbuff(1)%beg, k, l) + 4._wp*var%sf(idwbuff(1)%beg + 1, k, l) - var%sf(idwbuff(1)%beg + 2, k, l))/ & + (x_cc(idwbuff(1)%beg + 2) - x_cc(idwbuff(1)%beg)) + grad_x%sf(idwbuff(1)%end, k, l) = & + (+3._wp*var%sf(idwbuff(1)%end, k, l) - 4._wp*var%sf(idwbuff(1)%end - 1, k, l) + var%sf(idwbuff(1)%end - 2, k, l))/ & + (x_cc(idwbuff(1)%end) - x_cc(idwbuff(1)%end - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() if (n > 0) then $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, idwbuff(2)%beg, l) = & - (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & - (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) - grad_y%sf(j, idwbuff(2)%end, l) = & - (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & - (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, idwbuff(2)%beg, l) = & + (-3._wp*var%sf(j, idwbuff(2)%beg, l) + 4._wp*var%sf(j, idwbuff(2)%beg + 1, l) - var%sf(j, idwbuff(2)%beg + 2, l))/ & + (y_cc(idwbuff(2)%beg + 2) - y_cc(idwbuff(2)%beg)) + grad_y%sf(j, idwbuff(2)%end, l) = & + (+3._wp*var%sf(j, idwbuff(2)%end, l) - 4._wp*var%sf(j, idwbuff(2)%end - 1, l) + var%sf(j, idwbuff(2)%end - 2, l))/ & + (y_cc(idwbuff(2)%end) - y_cc(idwbuff(2)%end - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() if (p > 0) then $:GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, idwbuff(3)%beg) = & - (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & - (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) - grad_z%sf(j, k, idwbuff(3)%end) = & - (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & - (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) - end do + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, idwbuff(3)%beg) = & + (-3._wp*var%sf(j, k, idwbuff(3)%beg) + 4._wp*var%sf(j, k, idwbuff(3)%beg + 1) - var%sf(j, k, idwbuff(3)%beg + 2))/ & + (z_cc(idwbuff(3)%beg + 2) - z_cc(is3_viscous%beg)) + grad_z%sf(j, k, idwbuff(3)%end) = & + (+3._wp*var%sf(j, k, idwbuff(3)%end) - 4._wp*var%sf(j, k, idwbuff(3)%end - 1) + var%sf(j, k, idwbuff(3)%end - 2))/ & + (z_cc(idwbuff(3)%end) - z_cc(idwbuff(3)%end - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if end if if (bc_x%beg <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & - (x_cc(2) - x_cc(0)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(0, k, l) = (-3._wp*var%sf(0, k, l) + 4._wp*var%sf(1, k, l) - var%sf(2, k, l))/ & + (x_cc(2) - x_cc(0)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bc_x%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & - (x_cc(m) - x_cc(m - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + grad_x%sf(m, k, l) = (3._wp*var%sf(m, k, l) - 4._wp*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & + (x_cc(m) - x_cc(m - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (n > 0) then if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_NULL) then $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & - (y_cc(2) - y_cc(0)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, 0, l) = (-3._wp*var%sf(j, 0, l) + 4._wp*var%sf(j, 1, l) - var%sf(j, 2, l))/ & + (y_cc(2) - y_cc(0)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bc_y%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(collapse=2) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & - (y_cc(n) - y_cc(n - 2)) - end do + do l = idwbuff(3)%beg, idwbuff(3)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_y%sf(j, n, l) = (3._wp*var%sf(j, n, l) - 4._wp*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & + (y_cc(n) - y_cc(n - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (p > 0) then if (bc_z%beg <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, 0) = & - (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & - (z_cc(2) - z_cc(0)) - end do + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, 0) = & + (-3._wp*var%sf(j, k, 0) + 4._wp*var%sf(j, k, 1) - var%sf(j, k, 2))/ & + (z_cc(2) - z_cc(0)) end do + end do $:END_GPU_PARALLEL_LOOP() end if if (bc_z%end <= BC_GHOST_EXTRAP) then $:GPU_PARALLEL_LOOP(collapse=2) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, p) = & - (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & - (z_cc(p) - z_cc(p - 2)) - end do + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + grad_z%sf(j, k, p) = & + (3._wp*var%sf(j, k, p) - 4._wp*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & + (z_cc(p) - z_cc(p - 2)) end do + end do $:END_GPU_PARALLEL_LOOP() end if end if @@ -1517,4 +1517,4 @@ contains end subroutine s_finalize_viscous_module -end module m_viscous \ No newline at end of file +end module m_viscous diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 6256330dc..3422dab8e 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -672,67 +672,165 @@ contains if (weno_order == 1) then if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_rs_vf_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do + do i = 1, ubound(v_vf, 1) + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if elseif (weno_order == 3) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + ! reconstruct from left side + + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j, k, l, i) + dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) + + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & + + weno_eps + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & + + weno_eps + + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + + elseif (wenoz) then + ! Borges, et al. (2008) + + tau = abs(beta(1) - beta(0)) + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) + + end if + + omega = alpha/sum(alpha) + + vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + + ! reconstruct from right side + + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) + + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + + elseif (wenoz) then + + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + + end if + + omega = alpha/sum(alpha) + + vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + #:endfor + elseif (weno_order == 5) then + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 1 + #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + if (weno_dir == ${WENO_DIR}$) then + $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size ! reconstruct from left side + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 1, k, l, i) dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - v_rs_ws_${XYZ}$ (j, k, l, i) dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 2, k, l, i) poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) - - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) + + beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) & + beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & + + weno_eps + beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & + weno_eps if (wenojs) then @@ -747,21 +845,36 @@ contains elseif (wenoz) then ! Borges, et al. (2008) - tau = abs(beta(1) - beta(0)) - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) + tau = abs(beta(2) - beta(0)) ! Equation 25 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) + + elseif (teno) then + ! Fu, et al. (2016) + ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 + tau = abs(beta(2) - beta(0)) + alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) + alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) + omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) + delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 + alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 end if omega = alpha/sum(alpha) - vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) ! reconstruct from right side poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) if (wenojs) then alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) @@ -776,31 +889,44 @@ contains alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) + end if omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) end do end do end do end do - $:END_GPU_PARALLEL_LOOP() - end if - #:endfor - elseif (weno_order == 5) then - #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 1 + $:END_GPU_PARALLEL_LOOP() + + if (mp_weno) then + call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & + vR_rs_vf_${XYZ}$) + end if + end if + #:endfor + #:endif + elseif (weno_order == 7) then + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 2 #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - ! reconstruct from left side + $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + $:GPU_LOOP(parallelism='[seq]') + do i = 1, v_size + if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity + + if (.not. teno) then + dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & + - v_rs_ws_${XYZ}$ (j + 2, k, l, i) dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - v_rs_ws_${XYZ}$ (j + 1, k, l, i) dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & @@ -809,304 +935,178 @@ contains - v_rs_ws_${XYZ}$ (j - 1, k, l, i) dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - v_rs_ws_${XYZ}$ (j - 2, k, l, i) + dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & + - v_rs_ws_${XYZ}$ (j - 3, k, l, i) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) + + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) + + else + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 + ! (Fu, et al., 2016) Table 1 + ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils + ! See Figure 2 (right) for right-sided flux (at i+1/2) + ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point + ! But we need to keep the stencil order to reuse the beta coefficients + poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& + poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& + poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& + poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& + poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& + #:endif + end if - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) & - + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) & - + weno_eps - beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) & - + weno_eps + if (.not. teno) then - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & + + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & + + weno_eps - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & + + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & + + weno_eps - elseif (wenoz) then - ! Borges, et al. (2008) + beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & + + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & + + weno_eps - tau = abs(beta(2) - beta(0)) ! Equation 25 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) + beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & + + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & + + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & + + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & + + weno_eps - elseif (teno) then - ! Fu, et al. (2016) - ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 - tau = abs(beta(2) - beta(0)) - alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) - alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) - omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) - delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 - alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 + else ! TENO + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 + ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 + beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& + beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& + beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& + + beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& + + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& + + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& + + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& + + weno_eps !& + + beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& + + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& + + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& + + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& + + weno_eps !& + #:endif + end if - end if + if (wenojs) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + elseif (mapped_weno) then + alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - ! reconstruct from right side - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + elseif (wenoz) then + ! Castro, et al. (2010) + ! Don & Borges (2013) also helps + tau = abs(beta(3) - beta(0)) ! Equation 50 + alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability + + elseif (teno) then + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 + tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils + alpha = 1._wp + tau/beta + alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then - - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) - - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) - - end if + delta = merge(0._wp, 1._wp, omega < teno_CT) + alpha = delta*d_cbL_${XYZ}$ (:, j) + #:endif + end if - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() + if (.not. teno) then + poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & + + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) + poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & + + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) + poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & + + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) + poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & + + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & + + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & + + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) + else + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 + poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& + poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& + poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& + poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& + poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& + #:endif + end if - if (mp_weno) then - call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, & - vR_rs_vf_${XYZ}$) - end if - end if - #:endfor - #:endif - elseif (weno_order == 7) then - #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 2 - #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] - if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, v_size - - if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity - - if (.not. teno) then - dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 2, k, l, i) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) & - - v_rs_ws_${XYZ}$ (j - 3, k, l, i) - - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbL_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbL_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) - - else - #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 - ! (Fu, et al., 2016) Table 1 - ! Note: Unlike TENO5, TENO7 stencils differ from WENO7 stencils - ! See Figure 2 (right) for right-sided flux (at i+1/2) - ! Here we need the left-sided flux, so we flip the weights with respect to the x=i point - ! But we need to keep the stencil order to reuse the beta coefficients - poly(0) = ( 2._wp*v(-1) + 5._wp*v( 0) - 1._wp*v( 1)) / 6._wp !& - poly(1) = (11._wp*v( 0) - 7._wp*v( 1) + 2._wp*v( 2)) / 6._wp !& - poly(2) = (-1._wp*v(-2) + 5._wp*v(-1) + 2._wp*v( 0)) / 6._wp !& - poly(3) = (25._wp*v( 0) - 23._wp*v( 1) + 13._wp*v( 2) - 3._wp*v( 3)) / 12._wp !& - poly(4) = ( 1._wp*v(-3) - 5._wp*v(-2) + 13._wp*v(-1) + 3._wp*v( 0)) / 12._wp !& - #:endif - end if - - if (.not. teno) then - - beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) & - + beta_coef_${XYZ}$ (j, 0, 1)*dvd(2)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 2)*dvd(2)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 3)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 0, 4)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) & - + weno_eps - - beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) & - + beta_coef_${XYZ}$ (j, 1, 1)*dvd(1)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 2)*dvd(1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 3)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 1, 4)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) & - + weno_eps - - beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) & - + beta_coef_${XYZ}$ (j, 2, 1)*dvd(0)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 2)*dvd(0)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 3)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 2, 4)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) & - + weno_eps - - beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) & - + beta_coef_${XYZ}$ (j, 3, 1)*dvd(-1)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 2)*dvd(-1)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 3)*dvd(-2)*dvd(-2) & - + beta_coef_${XYZ}$ (j, 3, 4)*dvd(-2)*dvd(-3) & - + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) & - + weno_eps - - else ! TENO - #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 - ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu & Tang, 2019) Section 3.2 - beta(0) = 13._wp/12._wp*(v(-1) - 2._wp*v( 0) + v( 1))**2._wp + (( v(-1) - v( 1))**2._wp)/4._wp + weno_eps !& - beta(1) = 13._wp/12._wp*(v( 0) - 2._wp*v( 1) + v( 2))**2._wp + ((3._wp*v( 0) - 4._wp*v( 1) + v( 2))**2._wp)/4._wp + weno_eps !& - beta(2) = 13._wp/12._wp*(v(-2) - 2._wp*v(-1) + v( 0))**2._wp + (( v(-2) - 4._wp*v(-1) + 3._wp*v( 0))**2._wp)/4._wp + weno_eps !& - - beta(3) = ( v( 0)*(2107._wp*v( 0) - 9402._wp*v( 1) + 7042._wp*v( 2) - 1854._wp*v( 3)) & !& - + v( 1)*( 11003._wp*v( 1) - 17246._wp*v( 2) + 4642._wp*v( 3)) & !& - + v( 2)*( 7043._wp*v( 2) - 3882._wp*v( 3)) & !& - + v( 3)*( 547._wp*v( 3)) ) / 240._wp & !& - + weno_eps !& - - beta(4) = ( v(-3)*(547._wp*v(-3) - 3882._wp*v(-2) + 4642._wp*v(-1) - 1854._wp*v( 0)) & !& - + v(-2)*( 7043._wp*v(-2) - 17246._wp*v(-1) + 7042._wp*v( 0)) & !& - + v(-1)*( 11003._wp*v(-1) - 9402._wp*v( 0)) & !& - + v( 0)*( 2107._wp*v( 0)) ) / 240._wp & !& - + weno_eps !& - #:endif - end if - - if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) - - elseif (wenoz) then - ! Castro, et al. (2010) - ! Don & Borges (2013) also helps - tau = abs(beta(3) - beta(0)) ! Equation 50 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability - - elseif (teno) then - #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 - tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils - alpha = 1._wp + tau/beta - alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 - omega = alpha/sum(alpha) - delta = merge(0._wp, 1._wp, omega < teno_CT) - alpha = delta*d_cbL_${XYZ}$ (:, j) - #:endif - end if + if (wenojs) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + elseif (mapped_weno) then + alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) omega = alpha/sum(alpha) + alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & + *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - - if (.not. teno) then - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(2) & - + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(1) & - + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(0) & - + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) & - + poly_coef_cbR_${XYZ}$ (j, 3, 0)*dvd(-1) & - + poly_coef_cbR_${XYZ}$ (j, 3, 1)*dvd(-2) & - + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) - else - #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 - poly(0) = (-1._wp*v(-1) + 5._wp*v( 0) + 2._wp*v( 1)) / 6._wp !& - poly(1) = ( 2._wp*v( 0) + 5._wp*v( 1) - 1._wp*v( 2)) / 6._wp !& - poly(2) = ( 2._wp*v(-2) - 7._wp*v(-1) + 11._wp*v( 0)) / 6._wp !& - poly(3) = ( 3._wp*v( 0) + 13._wp*v( 1) - 5._wp*v( 2) + 1._wp*v( 3)) / 12._wp !& - poly(4) = (-3._wp*v(-3) + 13._wp*v(-2) - 23._wp*v(-1) + 25._wp*v( 0)) / 12._wp !& - #:endif - end if - - if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - - elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) - omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) - - elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) + elseif (wenoz) then + alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) - elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) + elseif (teno) then + alpha = delta*d_cbR_${XYZ}$ (:, j) - end if + end if - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -1152,15 +1152,15 @@ contains if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1169,15 +1169,15 @@ contains if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1186,15 +1186,15 @@ contains if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do + do j = 1, v_size + do q = is3_weno%beg, is3_weno%end + do l = is2_weno%beg, is2_weno%end + do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn + v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if @@ -1246,130 +1246,130 @@ contains real(wp), parameter :: beta_mp = 4._wp/3._wp $:GPU_PARALLEL_LOOP(collapse=4,private='[d]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - vL_UL = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*alpha_mp - - vL_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - d_MD)*5.e-1_wp - - vL_LC = v_rs_ws(j, k, l, i) & - - (v_rs_ws(j + 1, k, l, i) & - - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vL_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - min(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j - 1, k, l, i), & - vL_MD), & - max(v_rs_ws(j, k, l, i), & - vL_UL, & - vL_LC)) - - vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & - *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & - abs(vL_max - vL_rs_vf(j, k, l, i))) - ! END: Left Monotonicity Preserving Bound - - ! Right Monotonicity Preserving Bound - d(-1) = v_rs_ws(j, k, l, i) & - + v_rs_ws(j - 2, k, l, i) & - - v_rs_ws(j - 1, k, l, i) & - *2._wp - d(0) = v_rs_ws(j + 1, k, l, i) & - + v_rs_ws(j - 1, k, l, i) & - - v_rs_ws(j, k, l, i) & - *2._wp - d(1) = v_rs_ws(j + 2, k, l, i) & - + v_rs_ws(j, k, l, i) & - - v_rs_ws(j + 1, k, l, i) & - *2._wp - - d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & - *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & - *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & - *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & - abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - - d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & - *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & - *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & - *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & - abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - - vR_UL = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*alpha_mp - - vR_MD = (v_rs_ws(j, k, l, i) & - + v_rs_ws(j + 1, k, l, i) & - - d_MD)*5.e-1_wp - - vR_LC = v_rs_ws(j, k, l, i) & - + (v_rs_ws(j, k, l, i) & - - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC - - vR_min = max(min(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - min(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_max = min(max(v_rs_ws(j, k, l, i), & - v_rs_ws(j + 1, k, l, i), & - vR_MD), & - max(v_rs_ws(j, k, l, i), & - vR_UL, & - vR_LC)) - - vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & - + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & - + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & - *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & - abs(vR_max - vR_rs_vf(j, k, l, i))) - ! END: Right Monotonicity Preserving Bound - end do + do l = is3_weno%beg, is3_weno%end + do k = is2_weno%beg, is2_weno%end + do j = is1_weno%beg, is1_weno%end + do i = 1, v_size + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + vL_UL = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*alpha_mp + + vL_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - d_MD)*5.e-1_wp + + vL_LC = v_rs_ws(j, k, l, i) & + - (v_rs_ws(j + 1, k, l, i) & + - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vL_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + min(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j - 1, k, l, i), & + vL_MD), & + max(v_rs_ws(j, k, l, i), & + vL_UL, & + vL_LC)) + + vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i))) & + *min(abs(vL_min - vL_rs_vf(j, k, l, i)), & + abs(vL_max - vL_rs_vf(j, k, l, i))) + ! END: Left Monotonicity Preserving Bound + + ! Right Monotonicity Preserving Bound + d(-1) = v_rs_ws(j, k, l, i) & + + v_rs_ws(j - 2, k, l, i) & + - v_rs_ws(j - 1, k, l, i) & + *2._wp + d(0) = v_rs_ws(j + 1, k, l, i) & + + v_rs_ws(j - 1, k, l, i) & + - v_rs_ws(j, k, l, i) & + *2._wp + d(1) = v_rs_ws(j + 2, k, l, i) & + + v_rs_ws(j, k, l, i) & + - v_rs_ws(j + 1, k, l, i) & + *2._wp + + d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0))) & + *abs((sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(0))) & + *(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, d(1)))) & + *min(abs(4._wp*d(0) - d(1)), abs(d(0)), & + abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + + d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1))) & + *abs((sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1))) & + *(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, d(0)))) & + *min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + + vR_UL = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*alpha_mp + + vR_MD = (v_rs_ws(j, k, l, i) & + + v_rs_ws(j + 1, k, l, i) & + - d_MD)*5.e-1_wp + + vR_LC = v_rs_ws(j, k, l, i) & + + (v_rs_ws(j, k, l, i) & + - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC + + vR_min = max(min(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + min(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_max = min(max(v_rs_ws(j, k, l, i), & + v_rs_ws(j + 1, k, l, i), & + vR_MD), & + max(v_rs_ws(j, k, l, i), & + vR_UL, & + vR_LC)) + + vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) & + + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, i)) & + + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i))) & + *min(abs(vR_min - vR_rs_vf(j, k, l, i)), & + abs(vR_max - vR_rs_vf(j, k, l, i))) + ! END: Right Monotonicity Preserving Bound end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end subroutine s_preserve_monotonicity @@ -1411,4 +1411,4 @@ contains end subroutine s_finalize_weno_module -end module m_weno \ No newline at end of file +end module m_weno From 90639b23ca20867f76af9cc779d457481fc8e578 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Sun, 16 Nov 2025 11:37:44 -0600 Subject: [PATCH 18/33] refresh FFT code to master based on comment --- src/simulation/m_fftw.fpp | 180 ++++++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 74 deletions(-) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 175391899..6e51106f7 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -137,24 +137,24 @@ contains if (bc_y%beg >= 0) return #if defined(MFC_GPU) - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) + end do end do end do - end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -168,14 +168,14 @@ contains Nfq = 3 $:GPU_UPDATE(device='[Nfq]') - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do end do end do - end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -187,39 +187,71 @@ contains #endif #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do end do end do - end do $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings - $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do end do + end do + $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +#if defined(__PGI) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +#else + ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + call hipCheck(hipDeviceSynchronize()) +#endif + #:endcall GPU_HOST_DATA + + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + $:GPU_UPDATE(device='[Nfq]') + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do + end do end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') #if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) #else - ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) - call hipCheck(hipDeviceSynchronize()) + ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + call hipCheck(hipDeviceSynchronize()) #endif - #:endcall GPU_HOST_DATA + #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') do k = 1, sys_size do j = 0, m do l = 0, p @@ -228,66 +260,66 @@ contains end do end do end do - $:END_GPU_PARALLEL_LOOP() - end do + $:END_GPU_PARALLEL_LOOP() + end do #else - Nfq = 3 + Nfq = 3 + do j = 0, m + do k = 1, sys_size + data_fltr_cmplx(:) = (0_dp, 0_dp) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) + call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) + data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) + call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) + data_real(:) = data_real(:)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) + end do + end do + + ! Apply Fourier filter to additional rings + do i = 1, fourier_rings + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) do j = 0, m do k = 1, sys_size data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) - end do - end do - - ! Apply Fourier filter to additional rings - do i = 1, fourier_rings - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - do j = 0, m - do k = 1, sys_size - data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) - call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) - data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) - call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) - data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) - end do + q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) end do end do + end do #endif - end subroutine s_apply_fourier_filter + end subroutine s_apply_fourier_filter - !> The purpose of this subroutine is to destroy the fftw plan + !> The purpose of this subroutine is to destroy the fftw plan !! that will be used in the forward and backward DFTs when !! applying the Fourier filter in the azimuthal direction. - impure subroutine s_finalize_fftw_module + impure subroutine s_finalize_fftw_module #if defined(MFC_GPU) - integer :: ierr !< Generic flag used to identify and report GPU errors - @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) + integer :: ierr !< Generic flag used to identify and report GPU errors + @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) #if defined(__PGI) - ierr = cufftDestroy(fwd_plan_gpu) - ierr = cufftDestroy(bwd_plan_gpu) + ierr = cufftDestroy(fwd_plan_gpu) + ierr = cufftDestroy(bwd_plan_gpu) #else - ierr = hipfftDestroy(fwd_plan_gpu) - ierr = hipfftDestroy(bwd_plan_gpu) + ierr = hipfftDestroy(fwd_plan_gpu) + ierr = hipfftDestroy(bwd_plan_gpu) #endif #else - call fftw_free(fftw_real_data) - call fftw_free(fftw_cmplx_data) - call fftw_free(fftw_fltr_cmplx_data) + call fftw_free(fftw_real_data) + call fftw_free(fftw_cmplx_data) + call fftw_free(fftw_fltr_cmplx_data) - call fftw_destroy_plan(fwd_plan) - call fftw_destroy_plan(bwd_plan) + call fftw_destroy_plan(fwd_plan) + call fftw_destroy_plan(bwd_plan) #endif - end subroutine s_finalize_fftw_module - end module m_fftw + end subroutine s_finalize_fftw_module +end module m_fftw \ No newline at end of file From 2376abc248b2166b6e8b7624f14ab4a76f725198 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Sun, 16 Nov 2025 13:58:00 -0600 Subject: [PATCH 19/33] formatting: --- src/simulation/m_fftw.fpp | 86 +++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 6e51106f7..411485af1 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -138,23 +138,23 @@ contains #if defined(MFC_GPU) $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -169,13 +169,13 @@ contains $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -188,36 +188,36 @@ contains #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -233,13 +233,13 @@ contains $:GPU_UPDATE(device='[Nfq]') $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') @@ -252,14 +252,14 @@ contains #:endcall GPU_HOST_DATA $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do + end do $:END_GPU_PARALLEL_LOOP() end do @@ -322,4 +322,4 @@ contains #endif end subroutine s_finalize_fftw_module -end module m_fftw \ No newline at end of file +end module m_fftw From 0470192ca777e526e810a22306a31352de4b7361 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 15:37:36 -0500 Subject: [PATCH 20/33] Hard reset of file with controled search for macro changes --- src/common/m_mpi_common.fpp | 402 ++++++++++++++++++------------------ 1 file changed, 201 insertions(+), 201 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 226ab1b4c..b856514b0 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,152 +757,152 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) + end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) + end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end if #:else - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) + end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -958,175 +958,175 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end if #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=4,private='[i,j,k,l,r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[i,j,k,l,q,r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -1871,4 +1871,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common +end module m_mpi_common \ No newline at end of file From c451b2021565ad1bd002f743b5df40a2aed659a1 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 15:40:58 -0500 Subject: [PATCH 21/33] Formatting --- src/common/m_mpi_common.fpp | 366 ++++++++++++++++++------------------ 1 file changed, 183 insertions(+), 183 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index b856514b0..b8a8b0c5e 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -758,151 +758,151 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) - end do + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) - end do + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:else $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) - end do + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -959,174 +959,174 @@ contains if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:else ! Unpacking buffer from bc_z%beg $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif - end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) - end do + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) end do end do end do end do + end do $:END_GPU_PARALLEL_LOOP() end if #:endif @@ -1871,4 +1871,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common \ No newline at end of file +end module m_mpi_common From db852bfbaa64b48a9c71ebeb161e03eb3fbbcc43 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 15:44:45 -0500 Subject: [PATCH 22/33] Parenthesis change --- src/common/m_mpi_common.fpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index b8a8b0c5e..3ff753cac 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,7 +757,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -771,7 +771,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -787,7 +787,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -804,7 +804,7 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 @@ -820,7 +820,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -837,7 +837,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -855,7 +855,7 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:else - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -871,7 +871,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -888,7 +888,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -958,7 +958,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -979,7 +979,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -995,7 +995,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -1012,7 +1012,7 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = -buff_size, -1 @@ -1034,7 +1034,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -1051,7 +1051,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -1070,7 +1070,7 @@ contains end if #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP()(collapse=4,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1093,7 +1093,7 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1111,7 +1111,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP()(collapse=5,private='[r]') + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size From 85783642bf7805191e9c452d38ca01e43e63a783 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 22:55:47 -0500 Subject: [PATCH 23/33] Testing if reverting macros on the MPI file resolves seg fault issues --- src/common/m_mpi_common.fpp | 446 ++++++++++++++++++------------------ 1 file changed, 223 insertions(+), 223 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 3ff753cac..174e5443f 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,153 +757,153 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() - - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + #:endcall GPU_PARALLEL + + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL end if #:else - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() - - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + #:endcall GPU_PARALLEL + + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL end if #:endif end if @@ -958,176 +958,176 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVar - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, nVar + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL end if #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() - - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + #:endcall GPU_PARALLEL + + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL end if #:else ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=4,private='[r]') + do i = 1, nVar + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp) #if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if + if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if #endif + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() - - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + #:endcall GPU_PARALLEL + + #:call GPU_PARALLEL(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL end if #:endif end if @@ -1871,4 +1871,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common +end module m_mpi_common \ No newline at end of file From b521697401f0ffcdf0537b424528988999bb3e99 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 23:00:51 -0500 Subject: [PATCH 24/33] format --- src/common/m_mpi_common.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 174e5443f..64f0332c5 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1871,4 +1871,4 @@ contains end subroutine s_finalize_mpi_common_module -end module m_mpi_common \ No newline at end of file +end module m_mpi_common From 973ea73d02b360097d7fc59be5dd2dbdf92e5c40 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 23:08:16 -0500 Subject: [PATCH 25/33] Fixed old macros --- src/common/include/acc_macros.fpp | 32 ++++++++++++ src/common/include/omp_macros.fpp | 47 +++++++++++++++++ src/common/include/parallel_macros.fpp | 17 ++++++ src/common/m_mpi_common.fpp | 72 +++++++++++++------------- 4 files changed, 132 insertions(+), 36 deletions(-) diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp index 771ee976d..3c048e851 100644 --- a/src/common/include/acc_macros.fpp +++ b/src/common/include/acc_macros.fpp @@ -129,6 +129,38 @@ $:end_acc_directive #:enddef +#:def ACC_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None) + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = GEN_PARALLELISM_STR(parallelism) + #:set default_val = GEN_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = GEN_COPY_STR(copy) + #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n') + #:set copyout_val = GEN_COPYOUT_STR(copyout) + #:set create_val = GEN_CREATE_STR(create) + #:set no_create_val = GEN_NOCREATE_STR(no_create) + #:set present_val = GEN_PRESENT_STR(present) + #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr) + #:set attach_val = GEN_ATTACH_STR(attach) + #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #:set acc_directive = '!$acc parallel loop ' + & + & clause_val + extraAccArgs_val.strip('\n') + #:set acc_end_directive = '!$acc end parallel loop' + $:acc_directive + $:code + $:acc_end_directive +#:enddef + #:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp index 2e1df1dd8..2b7606d03 100644 --- a/src/common/include/omp_macros.fpp +++ b/src/common/include/omp_macros.fpp @@ -149,6 +149,53 @@ $:omp_end_directive #:enddef +#:def OMP_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None) + + #:set collapse_val = GEN_COLLAPSE_STR(collapse) + #:set parallelism_val = OMP_PARALLELISM_STR(parallelism) + #:set default_val = OMP_DEFAULT_STR(default) + #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n') + #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp) + #:set copy_val = OMP_COPY_STR(copy) + #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n') + #:set copyout_val = OMP_COPYOUT_STR(copyout) + #:set create_val = OMP_CREATE_STR(create) + #:set no_create_val = OMP_NOCREATE_STR(no_create) + #:set present_val = OMP_PRESENT_STR(present) + #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr) + #:set attach_val = OMP_MAP_STR('always,tofrom', attach) + #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs) + #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + & + & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + & + & copy_val.strip('\n') + copyin_val.strip('\n') + & + & copyout_val.strip('\n') + create_val.strip('\n') + & + & no_create_val.strip('\n') + present_val.strip('\n') + & + & deviceptr_val.strip('\n') + attach_val.strip('\n') + #! Hardcoding the parallelism for now + + #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID + #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + #:set omp_end_directive = '!$omp end target teams loop' + #:elif MFC_COMPILER == CCE_COMPILER_ID + #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) ' + #:set omp_end_directive = '!$omp end target teams distribute parallel do simd' + #:elif MFC_COMPILER == AMD_COMPILER_ID + #:set omp_start_directive = '!$omp target teams distribute parallel do ' + #:set omp_end_directive = '!$omp end target teams distribute parallel do' + #:else + #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) ' + #:set omp_end_directive = '!$omp end target teams loop' + #:endif + + #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n') + $:omp_directive + $:code + $:omp_end_directive +#:enddef + #:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index bfe4b3bea..e5dc5605e 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -19,6 +19,23 @@ #:enddef +#:def GPU_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', & + & default='present', firstprivate=None, reduction=None, reductionOp=None, & + & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & + & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None) + + #:set acc_code = ACC_PARALLEL_LOOP_OLD(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs) + #:set omp_code = OMP_PARALLEL_LOOP_OLD(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs) + +#if defined(MFC_OpenACC) + $:acc_code +#elif defined(MFC_OpenMP) + $:omp_code +#else + $:code +#endif +#:enddef + #:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', & & default='present', firstprivate=None, reduction=None, reductionOp=None, & & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, & diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 64f0332c5..a45469475 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,7 +757,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -768,10 +768,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -785,9 +785,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = 0, buff_size - 1 @@ -801,10 +801,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = 0, buff_size - 1 @@ -817,10 +817,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -835,9 +835,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = 0, buff_size - 1 @@ -852,10 +852,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD end if #:else - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do i = 1, nVar do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -868,10 +868,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -886,9 +886,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, buff_size - 1 do k = -buff_size, n + buff_size @@ -903,7 +903,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD end if #:endif end if @@ -958,7 +958,7 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -976,10 +976,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -993,9 +993,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do l = 0, p do k = 0, n do j = -buff_size, -1 @@ -1009,10 +1009,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do i = 1, nVar do l = 0, p do k = -buff_size, -1 @@ -1031,10 +1031,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -1049,9 +1049,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = 0, p do k = -buff_size, -1 @@ -1066,11 +1066,11 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD end if #:else ! Unpacking buffer from bc_z%beg - #:call GPU_PARALLEL(collapse=4,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') do i = 1, nVar do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1090,10 +1090,10 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1109,9 +1109,9 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD - #:call GPU_PARALLEL(collapse=5,private='[r]') + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 do l = -buff_size, -1 do k = -buff_size, n + buff_size @@ -1127,7 +1127,7 @@ contains end do end do end do - #:endcall GPU_PARALLEL + #:endcall GPU_PARALLEL_LOOP_OLD end if #:endif end if From 2ede9a1028d2396415f123274b94ee9e391e8986 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Sun, 16 Nov 2025 23:21:35 -0500 Subject: [PATCH 26/33] Undoing a deletion of a directive with and end loop --- src/common/m_boundary_common.fpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index edff5edc3..833bcbb90 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1171,7 +1171,7 @@ contains if (bc_x%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1) else - $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 1)%sf(0, k, l)) @@ -1190,7 +1190,7 @@ contains if (bc_x%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1) else - $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = 0, n select case (bc_type(1, 2)%sf(0, k, l)) @@ -1212,7 +1212,7 @@ contains if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) else - $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 1)%sf(k, 0, l)) @@ -1231,7 +1231,7 @@ contains if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) else - $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = 0, p do k = -buff_size, m + buff_size select case (bc_type(2, 2)%sf(k, 0, l)) @@ -1254,7 +1254,7 @@ contains if (bc_z%beg >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1) else - $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 1)%sf(k, l, 0)) @@ -1273,7 +1273,7 @@ contains if (bc_z%end >= 0) then call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1) else - $:END_GPU_PARALLEL_LOOP() + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) do l = -buff_size, n + buff_size do k = -buff_size, m + buff_size select case (bc_type(3, 2)%sf(k, l, 0)) From d5a3f74d4b6b28e287f588e2fad2dd5487b07ac6 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 00:16:21 -0500 Subject: [PATCH 27/33] Previous test passed except for gres resource errors, which are kind of fake errors. Now testing with half the loops in the new configuration. --- src/common/m_mpi_common.fpp | 202 ++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 101 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index a45469475..905b21570 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -757,153 +757,153 @@ contains #:for mpi_dir in [1, 2, 3] if (mpi_dir == ${mpi_dir}$) then #:if mpi_dir == 1 - #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVar - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, nVar + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j + pack_offset, k, l), kind=wp) end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(pb_in(j + pack_offset, k, l, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = nVar + 1, nVar + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = nVar + 1, nVar + 4 + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + (j + buff_size*(k + (n + 1)*l)) + buff_send(r) = real(mv_in(j + pack_offset, k, l, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() end if #:elif mpi_dir == 2 - #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(q_comm(i)%sf(j, k + pack_offset, l), kind=wp) end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(pb_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() end if #:else - #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() end if #:endif end if From abf2f5a1b413d31b654464bad13a8cb12ecbabb3 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 00:24:13 -0500 Subject: [PATCH 28/33] Got errors with the first 9 loops, so now I removed 4 more loops. 5 total loops in this test with new macros --- src/common/m_mpi_common.fpp | 94 ++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 905b21570..ec6dd1b8d 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -837,73 +837,73 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL_LOOP_OLD end if #:else - $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) + #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL_LOOP_OLD if (qbmm_comm) then - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL_LOOP_OLD - $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) + end do end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + #:endcall GPU_PARALLEL_LOOP_OLD end if #:endif end if From 71f19b16d1351e3be34ad3c5966b01c436b2c299 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 00:31:19 -0500 Subject: [PATCH 29/33] Last test pass, so I have narrowed the seg fault to 4 loops. Removed two more from that list for this test. --- src/common/m_mpi_common.fpp | 46 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index ec6dd1b8d..932b5dae3 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -837,38 +837,38 @@ contains end do $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send(r) = real(mv_in(j, k + pack_offset, l, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() end if #:else - #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]') - do i = 1, nVar - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=4,private='[r]') + do i = 1, nVar + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(q_comm(i)%sf(j, k, l + pack_offset), kind=wp) end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') From 1875e3cc230afdcff148227ab98fc8bf33168e74 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 00:44:02 -0500 Subject: [PATCH 30/33] Previous test compiled, meaning the problem is one of the two remaining loops or both. Changing one of them now. --- src/common/m_mpi_common.fpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 932b5dae3..2268034e6 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -871,22 +871,22 @@ contains $:END_GPU_PARALLEL_LOOP() if (qbmm_comm) then - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(pb_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') do i = nVar + 1, nVar + 4 From b5079f6d754b33b2605a689a3302731fa30cd751 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 00:51:54 -0500 Subject: [PATCH 31/33] The previous test compiled. If this does not compile then I am certain it is line 891-906 in m_mpi_common.fpp. I will then have to figure out why that causes a seg fault. --- src/common/m_mpi_common.fpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 2268034e6..766c6d392 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -888,22 +888,22 @@ contains end do $:END_GPU_PARALLEL_LOOP() - #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]') - do i = nVar + 1, nVar + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) - end do + $:GPU_PARALLEL_LOOP(collapse=5,private='[r]') + do i = nVar + 1, nVar + 4 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + do q = 1, nb + r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do end do end do - #:endcall GPU_PARALLEL_LOOP_OLD + end do + $:END_GPU_PARALLEL_LOOP() end if #:endif end if From 31d913f4c4d1fdf44950b13496193d55bc76ae18 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 01:04:49 -0500 Subject: [PATCH 32/33] For some reason, the last test compiled. It changed with format. I will be curious if the format change does anything. --- src/common/m_mpi_common.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 766c6d392..905b21570 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -896,7 +896,7 @@ contains do q = 1, nb r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) + ((k + buff_size) + (n + 2*buff_size + 1)*l)) buff_send(r) = real(mv_in(j, k, l + pack_offset, i - nVar, q), kind=wp) end do end do From 30417cbd8d21d0588c68b67ae13a0bbcf6fefae2 Mon Sep 17 00:00:00 2001 From: Daniel Vickers Date: Mon, 17 Nov 2025 02:42:42 -0500 Subject: [PATCH 33/33] Readding a fix that I implemented earlier for bubbles that fails on NVHOPC openACC, which must have been overwritten --- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_bubbles_EL_kernels.fpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 02d34ccf1..897eb0b69 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -1513,7 +1513,7 @@ contains lag_void_max = 0._wp lag_void_avg = 0._wp lag_vol = 0._wp - $:GPU_PARALLEL_LOOP(private='[i,j,k]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') + $:GPU_PARALLEL_LOOP(private='[volcell]', collapse=3, reduction='[[lag_vol, lag_void_avg], [lag_void_max]]', reductionOp='[+, MAX]', copy='[lag_vol, lag_void_avg, lag_void_max]') do k = 0, p do j = 0, n do i = 0, m diff --git a/src/simulation/m_bubbles_EL_kernels.fpp b/src/simulation/m_bubbles_EL_kernels.fpp index 99bb465f7..5d709d691 100644 --- a/src/simulation/m_bubbles_EL_kernels.fpp +++ b/src/simulation/m_bubbles_EL_kernels.fpp @@ -381,7 +381,7 @@ contains !> The purpose of this procedure is to calculate the characteristic cell volume !! @param cell Computational coordinates (x, y, z) !! @param Charvol Characteristic volume - elemental subroutine s_get_char_vol(cellx, celly, cellz, Charvol) + subroutine s_get_char_vol(cellx, celly, cellz, Charvol) $:GPU_ROUTINE(function_name='s_get_char_vol',parallelism='[seq]', & & cray_inline=True)