From 6d5760bc31a14996c419f7832034016581a7ba24 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Wed, 10 Dec 2025 16:43:51 -0500
Subject: [PATCH] Fixed compilation with post_processing and removed the last
 of the old macros

---
 src/common/include/acc_macros.fpp      |  32 ----
 src/common/include/omp_macros.fpp      |  47 -----
 src/common/include/parallel_macros.fpp |  17 --
 src/common/m_mpi_common.fpp            | 238 ++++++++++++-------------
 src/post_process/m_start_up.fpp        |  24 +++
 5 files changed, 143 insertions(+), 215 deletions(-)

diff --git a/src/common/include/acc_macros.fpp b/src/common/include/acc_macros.fpp
index 3c048e8510..771ee976db 100644
--- a/src/common/include/acc_macros.fpp
+++ b/src/common/include/acc_macros.fpp
@@ -129,38 +129,6 @@
     $:end_acc_directive
 #:enddef
 
-#:def ACC_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', &
-    & default='present', firstprivate=None, reduction=None, reductionOp=None, &
-    & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
-    & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
-    #:set collapse_val = GEN_COLLAPSE_STR(collapse)
-    #:set parallelism_val = GEN_PARALLELISM_STR(parallelism)
-    #:set default_val = GEN_DEFAULT_STR(default)
-    #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
-    #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
-    #:set copy_val = GEN_COPY_STR(copy)
-    #:set copyin_val = GEN_COPYIN_STR(copyin, False).strip('\n') + GEN_COPYIN_STR(copyinReadOnly, True).strip('\n')
-    #:set copyout_val = GEN_COPYOUT_STR(copyout)
-    #:set create_val = GEN_CREATE_STR(create)
-    #:set no_create_val = GEN_NOCREATE_STR(no_create)
-    #:set present_val = GEN_PRESENT_STR(present)
-    #:set deviceptr_val = GEN_DEVICEPTR_STR(deviceptr)
-    #:set attach_val = GEN_ATTACH_STR(attach)
-    #:set extraAccArgs_val = GEN_EXTRA_ARGS_STR(extraAccArgs)
-    #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
-        & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
-        & copy_val.strip('\n') + copyin_val.strip('\n') + &
-        & copyout_val.strip('\n') + create_val.strip('\n') + &
-        & no_create_val.strip('\n') + present_val.strip('\n') + &
-        & deviceptr_val.strip('\n') + attach_val.strip('\n')
-    #:set acc_directive = '!$acc parallel loop ' + &
-        & clause_val + extraAccArgs_val.strip('\n')
-    #:set acc_end_directive = '!$acc end parallel loop'
-    $:acc_directive
-    $:code
-    $:acc_end_directive
-#:enddef
-
 #:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
     & default='present', firstprivate=None, reduction=None, reductionOp=None, &
     & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
diff --git a/src/common/include/omp_macros.fpp b/src/common/include/omp_macros.fpp
index 2b7606d03f..2e1df1dd8a 100644
--- a/src/common/include/omp_macros.fpp
+++ b/src/common/include/omp_macros.fpp
@@ -149,53 +149,6 @@
     $:omp_end_directive
 #:enddef
 
-#:def OMP_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', &
-    & default='present', firstprivate=None, reduction=None, reductionOp=None, &
-    & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
-    & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None)
-    
-    #:set collapse_val = GEN_COLLAPSE_STR(collapse)
-    #:set parallelism_val = OMP_PARALLELISM_STR(parallelism)
-    #:set default_val = OMP_DEFAULT_STR(default)
-    #:set private_val = GEN_PRIVATE_STR(private, False).strip('\n') + GEN_PRIVATE_STR(firstprivate, True).strip('\n')
-    #:set reduction_val = GEN_REDUCTION_STR(reduction, reductionOp)
-    #:set copy_val = OMP_COPY_STR(copy)
-    #:set copyin_val = OMP_COPYIN_STR(copyin).strip('\n') + OMP_COPYIN_STR(copyinReadOnly).strip('\n')
-    #:set copyout_val = OMP_COPYOUT_STR(copyout)
-    #:set create_val = OMP_CREATE_STR(create)
-    #:set no_create_val = OMP_NOCREATE_STR(no_create)
-    #:set present_val = OMP_PRESENT_STR(present)
-    #:set deviceptr_val = OMP_DEVICEPTR_STR(deviceptr)
-    #:set attach_val = OMP_MAP_STR('always,tofrom', attach)
-    #:set extraOmpArgs_val = GEN_EXTRA_ARGS_STR(extraOmpArgs)
-    #:set clause_val = collapse_val.strip('\n') + parallelism_val.strip('\n') + &
-        & default_val.strip('\n') + private_val.strip('\n') + reduction_val.strip('\n') + &
-        & copy_val.strip('\n') + copyin_val.strip('\n') + &
-        & copyout_val.strip('\n') + create_val.strip('\n') + &
-        & no_create_val.strip('\n') + present_val.strip('\n') + &
-        & deviceptr_val.strip('\n') + attach_val.strip('\n')
-    #! Hardcoding the parallelism for now
-
-    #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
-        #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
-        #:set omp_end_directive = '!$omp end target teams loop'
-    #:elif MFC_COMPILER == CCE_COMPILER_ID
-        #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
-        #:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
-    #:elif MFC_COMPILER == AMD_COMPILER_ID
-        #:set omp_start_directive = '!$omp target teams distribute parallel do '
-        #:set omp_end_directive = '!$omp end target teams distribute parallel do'
-    #:else
-        #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
-        #:set omp_end_directive = '!$omp end target teams loop'
-    #:endif
-
-    #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
-    $:omp_directive
-    $:code
-    $:omp_end_directive
-#:enddef
-
 #:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
     & default='present', firstprivate=None, reduction=None, reductionOp=None, &
     & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp
index e5dc5605e9..bfe4b3beaf 100644
--- a/src/common/include/parallel_macros.fpp
+++ b/src/common/include/parallel_macros.fpp
@@ -19,23 +19,6 @@
 
 #:enddef
 
-#:def GPU_PARALLEL_LOOP_OLD(code, collapse=None, private=None, parallelism='[gang, vector]', &
-    & default='present', firstprivate=None, reduction=None, reductionOp=None, &
-    & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
-    & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None)
-
-    #:set acc_code = ACC_PARALLEL_LOOP_OLD(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
-    #:set omp_code = OMP_PARALLEL_LOOP_OLD(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
-
-#if defined(MFC_OpenACC)
-    $:acc_code
-#elif defined(MFC_OpenMP)
-    $:omp_code
-#else
-    $:code
-#endif
-#:enddef
-
 #:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
     & default='present', firstprivate=None, reduction=None, reductionOp=None, &
     & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 905b21570a..3ff753caca 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -958,176 +958,176 @@ contains
         #:for mpi_dir in [1, 2, 3]
             if (mpi_dir == ${mpi_dir}$) then
                 #:if mpi_dir == 1
-                    #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
-                        do l = 0, p
-                            do k = 0, n
-                                do j = -buff_size, -1
-                                    do i = 1, nVar
-                                        r = (i - 1) + v_size* &
-                                            (j + buff_size*((k + 1) + (n + 1)*l))
-                                        q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
+                    $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
+                    do l = 0, p
+                        do k = 0, n
+                            do j = -buff_size, -1
+                                do i = 1, nVar
+                                    r = (i - 1) + v_size* &
+                                        (j + buff_size*((k + 1) + (n + 1)*l))
+                                    q_comm(i)%sf(j + unpack_offset, k, l) = real(buff_recv(r), kind=stp)
 #if defined(__INTEL_COMPILER)
-                                        if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
-                                            print *, "Error", j, k, l, i
-                                            error stop "NaN(s) in recv"
-                                        end if
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
 #endif
-                                    end do
                                 end do
                             end do
                         end do
-                    #:endcall GPU_PARALLEL_LOOP_OLD
+                    end do
+                    $:END_GPU_PARALLEL_LOOP()
 
                     if (qbmm_comm) then
-                        #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
-                            do l = 0, p
-                                do k = 0, n
-                                    do j = -buff_size, -1
-                                        do i = nVar + 1, nVar + 4
-                                            do q = 1, nb
-                                                r = (i - 1) + (q - 1)*4 + v_size* &
-                                                    (j + buff_size*((k + 1) + (n + 1)*l))
-                                                pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp)
-                                            end do
+                        $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
+                        do l = 0, p
+                            do k = 0, n
+                                do j = -buff_size, -1
+                                    do i = nVar + 1, nVar + 4
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                (j + buff_size*((k + 1) + (n + 1)*l))
+                                            pb_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp)
                                         end do
                                     end do
                                 end do
                             end do
-                        #:endcall GPU_PARALLEL_LOOP_OLD
+                        end do
+                        $:END_GPU_PARALLEL_LOOP()
 
-                        #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
-                            do l = 0, p
-                                do k = 0, n
-                                    do j = -buff_size, -1
-                                        do i = nVar + 1, nVar + 4
-                                            do q = 1, nb
-                                                r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                    (j + buff_size*((k + 1) + (n + 1)*l))
-                                                mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp)
-                                            end do
+                        $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
+                        do l = 0, p
+                            do k = 0, n
+                                do j = -buff_size, -1
+                                    do i = nVar + 1, nVar + 4
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                (j + buff_size*((k + 1) + (n + 1)*l))
+                                            mv_in(j + unpack_offset, k, l, i - nVar, q) = real(buff_recv(r), kind=stp)
                                         end do
                                     end do
                                 end do
                             end do
-                        #:endcall GPU_PARALLEL_LOOP_OLD
+                        end do
+                        $:END_GPU_PARALLEL_LOOP()
                     end if
                 #:elif mpi_dir == 2
-                    #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
-                        do i = 1, nVar
-                            do l = 0, p
-                                do k = -buff_size, -1
-                                    do j = -buff_size, m + buff_size
-                                        r = (i - 1) + v_size* &
-                                            ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                             ((k + buff_size) + buff_size*l))
-                                        q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
+                    $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
+                    do i = 1, nVar
+                        do l = 0, p
+                            do k = -buff_size, -1
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + v_size* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + buff_size*l))
+                                    q_comm(i)%sf(j, k + unpack_offset, l) = real(buff_recv(r), kind=stp)
 #if defined(__INTEL_COMPILER)
-                                        if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
-                                            print *, "Error", j, k, l, i
-                                            error stop "NaN(s) in recv"
-                                        end if
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
 #endif
-                                    end do
                                 end do
                             end do
                         end do
-                    #:endcall GPU_PARALLEL_LOOP_OLD
+                    end do
+                    $:END_GPU_PARALLEL_LOOP()
 
                     if (qbmm_comm) then
-                        #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
-                            do i = nVar + 1, nVar + 4
-                                do l = 0, p
-                                    do k = -buff_size, -1
-                                        do j = -buff_size, m + buff_size
-                                            do q = 1, nb
-                                                r = (i - 1) + (q - 1)*4 + v_size* &
-                                                    ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                     ((k + buff_size) + buff_size*l))
-                                                pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp)
-                                            end do
+                        $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, p
+                                do k = -buff_size, -1
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + buff_size*l))
+                                            pb_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp)
                                         end do
                                     end do
                                 end do
                             end do
-                        #:endcall GPU_PARALLEL_LOOP_OLD
-
-                        #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
-                            do i = nVar + 1, nVar + 4
-                                do l = 0, p
-                                    do k = -buff_size, -1
-                                        do j = -buff_size, m + buff_size
-                                            do q = 1, nb
-                                                r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                    ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                     ((k + buff_size) + buff_size*l))
-                                                mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp)
-                                            end do
+                        end do
+                        $:END_GPU_PARALLEL_LOOP()
+
+                        $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
+                        do i = nVar + 1, nVar + 4
+                            do l = 0, p
+                                do k = -buff_size, -1
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + buff_size*l))
+                                            mv_in(j, k + unpack_offset, l, i - nVar, q) = real(buff_recv(r), kind=stp)
                                         end do
                                     end do
                                 end do
                             end do
-                        #:endcall GPU_PARALLEL_LOOP_OLD
+                        end do
+                        $:END_GPU_PARALLEL_LOOP()
                     end if
                 #:else
                     ! Unpacking buffer from bc_z%beg
-                    #:call GPU_PARALLEL_LOOP_OLD(collapse=4,private='[r]')
-                        do i = 1, nVar
-                            do l = -buff_size, -1
-                                do k = -buff_size, n + buff_size
-                                    do j = -buff_size, m + buff_size
-                                        r = (i - 1) + v_size* &
-                                            ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                             ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                              (l + buff_size)))
-                                        q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
+                    $:GPU_PARALLEL_LOOP(collapse=4,private='[r]')
+                    do i = 1, nVar
+                        do l = -buff_size, -1
+                            do k = -buff_size, n + buff_size
+                                do j = -buff_size, m + buff_size
+                                    r = (i - 1) + v_size* &
+                                        ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                         ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                          (l + buff_size)))
+                                    q_comm(i)%sf(j, k, l + unpack_offset) = real(buff_recv(r), kind=stp)
 #if defined(__INTEL_COMPILER)
-                                        if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
-                                            print *, "Error", j, k, l, i
-                                            error stop "NaN(s) in recv"
-                                        end if
+                                    if (ieee_is_nan(q_comm(i)%sf(j, k, l))) then
+                                        print *, "Error", j, k, l, i
+                                        error stop "NaN(s) in recv"
+                                    end if
 #endif
-                                    end do
                                 end do
                             end do
                         end do
-                    #:endcall GPU_PARALLEL_LOOP_OLD
+                    end do
+                    $:END_GPU_PARALLEL_LOOP()
 
                     if (qbmm_comm) then
-                        #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
-                            do i = nVar + 1, nVar + 4
-                                do l = -buff_size, -1
-                                    do k = -buff_size, n + buff_size
-                                        do j = -buff_size, m + buff_size
-                                            do q = 1, nb
-                                                r = (i - 1) + (q - 1)*4 + v_size* &
-                                                    ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                     ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                                      (l + buff_size)))
-                                                pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp)
-                                            end do
+                        $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
+                        do i = nVar + 1, nVar + 4
+                            do l = -buff_size, -1
+                                do k = -buff_size, n + buff_size
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                                  (l + buff_size)))
+                                            pb_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp)
                                         end do
                                     end do
                                 end do
                             end do
-                        #:endcall GPU_PARALLEL_LOOP_OLD
-
-                        #:call GPU_PARALLEL_LOOP_OLD(collapse=5,private='[r]')
-                            do i = nVar + 1, nVar + 4
-                                do l = -buff_size, -1
-                                    do k = -buff_size, n + buff_size
-                                        do j = -buff_size, m + buff_size
-                                            do q = 1, nb
-                                                r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
-                                                    ((j + buff_size) + (m + 2*buff_size + 1)* &
-                                                     ((k + buff_size) + (n + 2*buff_size + 1)* &
-                                                      (l + buff_size)))
-                                                mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp)
-                                            end do
+                        end do
+                        $:END_GPU_PARALLEL_LOOP()
+
+                        $:GPU_PARALLEL_LOOP(collapse=5,private='[r]')
+                        do i = nVar + 1, nVar + 4
+                            do l = -buff_size, -1
+                                do k = -buff_size, n + buff_size
+                                    do j = -buff_size, m + buff_size
+                                        do q = 1, nb
+                                            r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
+                                                ((j + buff_size) + (m + 2*buff_size + 1)* &
+                                                 ((k + buff_size) + (n + 2*buff_size + 1)* &
+                                                  (l + buff_size)))
+                                            mv_in(j, k, l + unpack_offset, i - nVar, q) = real(buff_recv(r), kind=stp)
                                         end do
                                     end do
                                 end do
                             end do
-                        #:endcall GPU_PARALLEL_LOOP_OLD
+                        end do
+                        $:END_GPU_PARALLEL_LOOP()
                     end if
                 #:endif
             end if
diff --git a/src/post_process/m_start_up.fpp b/src/post_process/m_start_up.fpp
index 41516305e2..904c4207cc 100644
--- a/src/post_process/m_start_up.fpp
+++ b/src/post_process/m_start_up.fpp
@@ -510,7 +510,9 @@ contains
                 end do
             end do
 
+#ifdef MFC_MPI
             call MPI_ALLREDUCE(MPI_IN_PLACE, En, Nf, mpi_p, MPI_SUM, MPI_COMM_WORLD, ierr)
+#endif
 
             if (proc_rank == 0) then
                 call s_create_directory('En_FFT_DATA')
@@ -888,6 +890,8 @@ contains
         integer :: dest_rank, src_rank
         integer :: i, j, k, l
 
+#ifdef MFC_MPI
+
         allocate (sendbuf(Nx*Nyloc*Nzloc))
         allocate (recvbuf(Nx*Nyloc*Nzloc))
 
@@ -917,6 +921,8 @@ contains
         deallocate (sendbuf)
         deallocate (recvbuf)
 
+#endif
+
     end subroutine s_mpi_transpose_x2y
 
     subroutine s_mpi_transpose_y2z
@@ -924,6 +930,8 @@ contains
         integer :: dest_rank, src_rank
         integer :: j, k, l
 
+#ifdef MFC_MPI
+
         allocate (sendbuf(Ny*Nxloc*Nzloc))
         allocate (recvbuf(Ny*Nxloc*Nzloc))
 
@@ -953,6 +961,8 @@ contains
         deallocate (sendbuf)
         deallocate (recvbuf)
 
+#endif
+
     end subroutine s_mpi_transpose_y2z
 
     impure subroutine s_initialize_modules
@@ -984,6 +994,7 @@ contains
             s_read_data_files => s_read_parallel_data_files
         end if
 
+#ifdef MFC_MPI
         if (fft_wrt) then
 
             num_procs_x = (m_glb + 1)/(m + 1)
@@ -1054,12 +1065,15 @@ contains
             call MPI_CART_COORDS(MPI_COMM_CART13, proc_rank13, 2, cart2d13_coords, ierr)
 
         end if
+#endif
     end subroutine s_initialize_modules
 
     subroutine s_mpi_FFT_fwd
 
         integer :: j, k, l
 
+#ifdef MFC_MPI
+
         do l = 1, Nzloc
             do k = 1, Nyloc
                 do j = 1, Nx
@@ -1118,9 +1132,15 @@ contains
             end do
         end do
 
+#endif
+
     end subroutine s_mpi_FFT_fwd
 
     impure subroutine s_initialize_mpi_domain
+
+        num_dims = 1 + min(1, n) + min(1, p)
+
+#ifdef MFC_MPI
         ! Initialization of the MPI environment
         call s_mpi_initialize()
 
@@ -1144,6 +1164,8 @@ contains
         call s_mpi_decompose_computational_domain()
         call s_check_inputs_fft()
 
+#endif
+
     end subroutine s_initialize_mpi_domain
 
     impure subroutine s_finalize_modules
@@ -1169,11 +1191,13 @@ contains
             call fftw_cleanup()
         end if
 
+#ifdef MFC_MPI
         if (fft_wrt) then
             if (MPI_COMM_CART12 /= MPI_COMM_NULL) call MPI_Comm_free(MPI_COMM_CART12, ierr)
             if (MPI_COMM_CART13 /= MPI_COMM_NULL) call MPI_Comm_free(MPI_COMM_CART13, ierr)
             if (MPI_COMM_CART /= MPI_COMM_NULL) call MPI_Comm_free(MPI_COMM_CART, ierr)
         end if
+#endif
 
         ! Deallocation procedures for the modules
         call s_finalize_data_output_module()