2525#define nrn_pragma_stringify (x ) #x
2626// For now we do not support OpenMP offload without OpenACC, that will come soon
2727#ifdef CORENRN_PREFER_OPENMP_OFFLOAD
28- #define nrn_omp_pragma (x ) _Pragma (nrn_pragma_stringify(omp x))
29- #define nrn_acc_pragma (x )
28+ #define nrn_pragma_omp (x ) _Pragma (nrn_pragma_stringify(omp x))
29+ #define nrn_pragma_acc (x )
3030#else
31- #define nrn_acc_pragma (x ) _Pragma (nrn_pragma_stringify(acc x))
32- #define nrn_omp_pragma (x )
31+ #define nrn_pragma_acc (x ) _Pragma (nrn_pragma_stringify(acc x))
32+ #define nrn_pragma_omp (x )
3333#endif
3434#else
3535// No OpenACC -> no GPU offload, for now.
3636#ifdef CORENRN_PREFER_OPENMP_OFFLOAD
3737#error "Pure OpenMP offload not supported yet"
3838#endif
39- #define nrn_acc_pragma (x )
40- #define nrn_omp_pragma (x )
39+ #define nrn_pragma_acc (x )
40+ #define nrn_pragma_omp (x )
4141#endif
4242
4343namespace coreneuron {
@@ -506,7 +506,7 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
506506 bool has_subtrees_to_compute = true ;
507507
508508 // clang-format off
509- nrn_acc_pragma (loop seq)
509+ nrn_pragma_acc (loop seq)
510510 for (; has_subtrees_to_compute; ) { // ncycle loop
511511#if !defined(_OPENACC)
512512 // serial test, gpu does this in parallel
@@ -517,11 +517,11 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
517517 // what is the index
518518 int ip = GPU_PARENT (i);
519519 double p = GPU_A (i) / GPU_D (i);
520- nrn_acc_pragma (atomic update)
521- nrn_omp_pragma (atomic update)
520+ nrn_pragma_acc (atomic update)
521+ nrn_pragma_omp (atomic update)
522522 GPU_D (ip) -= p * GPU_B (i);
523- nrn_acc_pragma (atomic update)
524- nrn_omp_pragma (atomic update)
523+ nrn_pragma_acc (atomic update)
524+ nrn_pragma_omp (atomic update)
525525 GPU_RHS (ip) -= p * GPU_RHS (i);
526526 }
527527#if !defined(_OPENACC)
@@ -554,7 +554,7 @@ static void bksub_interleaved2(NrnThread* nt,
554554#if !defined(_OPENACC)
555555 for (int i = root; i < lastroot; i += 1 ) {
556556#else
557- nrn_acc_pragma (loop seq)
557+ nrn_pragma_acc (loop seq)
558558 for (int i = root; i < lastroot; i += warpsize) {
559559#endif
560560 GPU_RHS (i) /= GPU_D (i); // the root
@@ -615,14 +615,14 @@ void solve_interleaved2(int ith) {
615615#if defined(_OPENACC) && !defined(CORENRN_PREFER_OPENMP_OFFLOAD)
616616 int nstride = stridedispl[nwarp];
617617#endif
618- nrn_acc_pragma (parallel loop gang vector vector_length (
618+ nrn_pragma_acc (parallel loop gang vector vector_length (
619619 warpsize) present (nt [0 :1 ],
620620 strides [0 :nstride],
621621 ncycles [0 :nwarp],
622622 stridedispl [0 :nwarp + 1 ],
623623 rootbegin [0 :nwarp + 1 ],
624624 nodebegin [0 :nwarp + 1 ]) if (nt->compute_gpu ) async (nt->stream_id ))
625- nrn_omp_pragma (target teams distribute parallel for simd depend (inout: nt) if (nt->compute_gpu ))
625+ nrn_pragma_omp (target teams distribute parallel for simd depend (inout: nt) if (nt->compute_gpu ))
626626 for (int icore = 0 ; icore < ncore; ++icore) {
627627 int iwarp = icore / warpsize; // figure out the >> value
628628 int ic = icore & (warpsize - 1 ); // figure out the & mask
@@ -641,7 +641,7 @@ void solve_interleaved2(int ith) {
641641 } // serial test mode
642642#endif
643643 }
644- nrn_acc_pragma (wait (nt->stream_id ))
644+ nrn_pragma_acc (wait (nt->stream_id ))
645645#ifdef _OPENACC
646646 }
647647#endif
@@ -676,19 +676,19 @@ void solve_interleaved1(int ith) {
676676 // OL211123: can we preserve the error checking behaviour of OpenACC's
677677 // present clause with OpenMP? It is a bug if these data are not present,
678678 // so diagnostics are helpful...
679- nrn_acc_pragma (parallel loop present (nt [0 :1 ],
679+ nrn_pragma_acc (parallel loop present (nt [0 :1 ],
680680 stride [0 :nstride],
681681 firstnode [0 :ncell],
682682 lastnode [0 :ncell],
683683 cellsize [0 :ncell]) if (nt->compute_gpu )
684684 async (nt->stream_id ))
685- nrn_omp_pragma (target teams distribute parallel for simd depend (inout: nt) if (nt->compute_gpu ))
685+ nrn_pragma_omp (target teams distribute parallel for simd depend (inout: nt) if (nt->compute_gpu ))
686686 for (int icell = 0 ; icell < ncell; ++icell) {
687687 int icellsize = cellsize[icell];
688688 triang_interleaved (nt, icell, icellsize, nstride, stride, lastnode);
689689 bksub_interleaved (nt, icell, icellsize, nstride, stride, firstnode);
690690 }
691- nrn_acc_pragma (wait (nt->stream_id ))
691+ nrn_pragma_acc (wait (nt->stream_id ))
692692}
693693
694694void solve_interleaved (int ith) {
0 commit comments