Skip to content

Commit 96a84de

Browse files
committed
Atomics and teams reduction for OpenMP GPU in Flang
This patch introduces three new feature to Flang. - Allow scalar variables to be used in map clause. - Enable LLVM based atomics for OpenMP's atomic constructs if it occurs in target region when gpu offload is enabled. - Implement teams reduction using atomics.
1 parent a0503d9 commit 96a84de

File tree

5 files changed

+80
-29
lines changed

5 files changed

+80
-29
lines changed

tools/flang1/flang1exe/semant.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343

4444
#include "atomic_common.h"
4545

46-
#define OPT_OMP_ATOMIC !flg.omptarget && !XBIT(69,0x1000)
4746

4847
static void gen_dinit(int, SST *);
4948
static void pop_subprogram(void);
@@ -1185,7 +1184,7 @@ semant1(int rednum, SST *top)
11851184
}
11861185
if (sem.mpaccatomic.seen &&
11871186
sem.mpaccatomic.action_type != ATOMIC_CAPTURE) {
1188-
if ((!sem.mpaccatomic.is_acc && OPT_OMP_ATOMIC)) {
1187+
if ((!sem.mpaccatomic.is_acc && use_opt_atomic(sem.doif_depth))) {
11891188
;
11901189
} else {
11911190
if (sem.mpaccatomic.is_acc)
@@ -1575,7 +1574,7 @@ semant1(int rednum, SST *top)
15751574
int ecs;
15761575
sem.mpaccatomic.apply = FALSE;
15771576
if (!sem.mpaccatomic.is_acc) {
1578-
if (OPT_OMP_ATOMIC) {
1577+
if (use_opt_atomic(sem.doif_depth)) {
15791578
ecs = mk_stmt(A_MP_ENDATOMIC, 0);
15801579
add_stmt(ecs);
15811580
} else {

tools/flang1/flang1exe/semant.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,7 @@ void CheckDecl(int);
14861486
void end_contained(void);
14871487

14881488
/* semsmp.c */
1489+
LOGICAL use_opt_atomic(int);
14891490
int emit_epar(void);
14901491
int emit_etarget(void);
14911492
void is_dovar_sptr(int);

tools/flang1/flang1exe/semant3.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ static void end_association(int sptr);
7171
static int get_sst_named_whole_variable(SST *rhs);
7272
static int get_derived_type(SST *, LOGICAL);
7373

74-
#define OPT_OMP_ATOMIC !flg.omptarget && !XBIT(69,0x1000)
7574
#define IN_OPENMP_ATOMIC (sem.mpaccatomic.ast && !(sem.mpaccatomic.is_acc))
7675

7776
/** \brief semantic actions - part 3.
@@ -557,7 +556,7 @@ semant3(int rednum, SST *top)
557556
}
558557
gen_finalization_for_sym(sptr1, std, parent);
559558
}
560-
if (OPT_OMP_ATOMIC && sem.mpaccatomic.seen && !sem.mpaccatomic.is_acc) {
559+
if (use_opt_atomic(sem.doif_depth) && sem.mpaccatomic.seen && !sem.mpaccatomic.is_acc) {
561560
sem.mpaccatomic.accassignc++;
562561
ast = do_openmp_atomics(RHS(2), RHS(5));
563562
if (ast) {

tools/flang1/flang1exe/semsmp.c

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ static LOGICAL is_valid_atomic_update(int, int);
114114
static int mk_atomic_update_binop(int, int);
115115
static int mk_atomic_update_intr(int, int);
116116
static void do_map();
117+
static LOGICAL use_atomic_for_reduction(int);
117118

118119
#ifdef OMP_OFFLOAD_LLVM
119120
static void mp_handle_map_clause(SST *, int, char *, int, int, bool);
@@ -554,7 +555,7 @@ static LOGICAL any_pflsr_private = FALSE;
554555
static void add_pragmasyms(int pragmatype, int pragmascope, ITEM *itemp, int);
555556
static void add_pragma(int pragmatype, int pragmascope, int pragmaarg);
556557

557-
#define OPT_OMP_ATOMIC !flg.omptarget && !XBIT(69,0x1000)
558+
#define OPT_OMP_ATOMIC !XBIT(69,0x1000)
558559

559560
static int kernel_argnum;
560561

@@ -1348,7 +1349,7 @@ semsmp(int rednum, SST *top)
13481349
case MP_STMT33:
13491350
if (sem.mpaccatomic.action_type == ATOMIC_CAPTURE) {
13501351
int ecs;
1351-
if (OPT_OMP_ATOMIC) {
1352+
if (use_opt_atomic(sem.doif_depth)) {
13521353
ecs = mk_stmt(A_MP_ENDATOMIC, 0);
13531354
std = add_stmt(ecs);
13541355
} else {
@@ -3281,7 +3282,7 @@ semsmp(int rednum, SST *top)
32813282
sem.mpaccatomic.ast = 0;
32823283
sem.mpaccatomic.seen = TRUE;
32833284

3284-
if (OPT_OMP_ATOMIC) {
3285+
if (use_opt_atomic(sem.doif_depth)) {
32853286
sem.mpaccatomic.ast = mk_stmt(A_MP_ATOMIC, 0);
32863287
(void)add_stmt(sem.mpaccatomic.ast);
32873288
} else {
@@ -8002,7 +8003,7 @@ gen_reduction(REDUC *reducp, REDUC_SYM *reduc_symp, LOGICAL rmme,
80028003
return;
80038004
}
80048005
}
8005-
if (OPT_OMP_ATOMIC)
8006+
if (use_atomic_for_reduction(sem.doif_depth))
80068007
add_stmt(mk_stmt(A_MP_ATOMIC, 0));
80078008

80088009
(void)mk_storage(reduc_symp->shared, &lhs);
@@ -8023,7 +8024,8 @@ gen_reduction(REDUC *reducp, REDUC_SYM *reduc_symp, LOGICAL rmme,
80238024
* shared <-- intrin(shared, private)
80248025
*/
80258026
(void)ref_intrin(&intrin, arg1);
8026-
if (OPT_OMP_ATOMIC && sem.mpaccatomic.rmw_op != AOP_UNDEF) {
8027+
if (use_atomic_for_reduction(sem.doif_depth) &&
8028+
sem.mpaccatomic.rmw_op != AOP_UNDEF) {
80278029
MEMORY_ORDER save_mem_order = sem.mpaccatomic.mem_order;
80288030
sem.mpaccatomic.mem_order = MO_SEQ_CST;
80298031
mklvalue(&lhs, 1);
@@ -8069,7 +8071,7 @@ gen_reduction(REDUC *reducp, REDUC_SYM *reduc_symp, LOGICAL rmme,
80698071
SST_ASTP(&op1, ast);
80708072
SST_SHAPEP(&op1, A_SHAPEG(ast));
80718073

8072-
if (OPT_OMP_ATOMIC && get_atomic_rmw_op(opc) != AOP_UNDEF) {
8074+
if (use_atomic_for_reduction(sem.doif_depth)&& get_atomic_rmw_op(opc) != AOP_UNDEF) {
80738075
MEMORY_ORDER save_mem_order = sem.mpaccatomic.mem_order;
80748076

80758077
sem.mpaccatomic.rmw_op = get_atomic_rmw_op(opc);
@@ -8140,7 +8142,7 @@ end_reduction(REDUC *red, int doif)
81408142
if (reduc_symp->shared == 0)
81418143
/* error - illegal reduction variable */
81428144
continue;
8143-
if (!OPT_OMP_ATOMIC && !done) {
8145+
if (!use_atomic_for_reduction(sem.doif_depth) && !done) {
81448146
ast_crit = emit_bcs_ecs(A_MP_CRITICAL);
81458147
done = TRUE;
81468148
}
@@ -8151,21 +8153,20 @@ end_reduction(REDUC *red, int doif)
81518153

81528154
for (reducp = red; reducp; reducp = reducp->next) {
81538155
for (reduc_symp = reducp->list; reduc_symp; reduc_symp = reduc_symp->next) {
8154-
if(flg.omptarget && save_target && save_teams) {
8155-
error(1201, ERR_Severe, gbl.lineno, "reduction", "teams");
8156-
}
81578156
if (reduc_symp->shared == 0)
81588157
/* error - illegal reduction variable or set by loop above */
81598158
continue;
8160-
if (!OPT_OMP_ATOMIC && !done) {
8159+
if (!use_atomic_for_reduction(sem.doif_depth) && !done) {
81618160
#ifdef OMP_OFFLOAD_LLVM
81628161
ast_red = mk_stmt(A_MP_BREDUCTION, 0);
8163-
(void)add_stmt(ast_red);
8162+
(void) add_stmt(ast_red);
81648163
#endif
81658164
ast_crit = emit_bcs_ecs(A_MP_CRITICAL);
81668165
#ifdef OMP_OFFLOAD_LLVM
8167-
A_ISOMPREDUCTIONP(ast_crit, 1);
8168-
gen_reduction_ompaccel(reducp, reduc_symp, FALSE, in_parallel);
8166+
if (!use_atomic_for_reduction(sem.doif_depth)) {
8167+
A_ISOMPREDUCTIONP(ast_crit, 1);
8168+
gen_reduction_ompaccel(reducp, reduc_symp, FALSE, in_parallel);
8169+
}
81698170
#endif
81708171
done = TRUE;
81718172
}
@@ -8177,7 +8178,7 @@ end_reduction(REDUC *red, int doif)
81778178
sem.parallel = save_par;
81788179
sem.target = save_target;
81798180
sem.teams = save_teams;
8180-
if (!OPT_OMP_ATOMIC) {
8181+
if (!use_atomic_for_reduction(sem.doif_depth)) {
81818182
ast_endcrit = emit_bcs_ecs(A_MP_ENDCRITICAL);
81828183
A_LOPP(ast_crit, ast_endcrit);
81838184
A_LOPP(ast_endcrit, ast_crit);
@@ -10259,3 +10260,36 @@ check_map_data_sharing(int sptr)
1025910260
return TRUE;
1026010261
}
1026110262

10263+
/**
10264+
* \brief Decide to use optimized atomic usage.
10265+
*/
10266+
LOGICAL use_opt_atomic(int d)
10267+
{
10268+
#ifdef OMP_OFFLOAD_LLVM
10269+
if(flg.omptarget && (DI_IN_NEST(d, DI_TARGET) ||
10270+
DI_IN_NEST(d, DI_TARGTEAMSDISTPARDO) ||
10271+
DI_IN_NEST(d, DI_TARGPARDO) ||
10272+
DI_IN_NEST(d, DI_TARGETSIMD) ||
10273+
DI_IN_NEST(d, DI_TARGTEAMSDIST)))
10274+
return TRUE;
10275+
#endif
10276+
return OPT_OMP_ATOMIC;
10277+
}
10278+
10279+
/**
10280+
\brief Decide whether to use llvm atomic for reduction or not.
10281+
Atomic is used only for teams reduction.
10282+
*/
10283+
static LOGICAL use_atomic_for_reduction(int d)
10284+
{
10285+
#ifdef OMP_OFFLOAD_LLVM
10286+
if(flg.omptarget && DI_IN_NEST(d, DI_TARGET) ) {
10287+
if(DI_IN_NEST(d, DI_PARDO) ||
10288+
DI_IN_NEST(d, DI_TARGTEAMSDISTPARDO))
10289+
return OPT_OMP_ATOMIC;
10290+
else
10291+
return TRUE;
10292+
}
10293+
#endif
10294+
return OPT_OMP_ATOMIC;
10295+
}

tools/flang2/flang2exe/ompaccel.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,12 @@ tinfo_update_maptype(OMPACCEL_SYM *tsyms, int nargs, SPTR host_symbol,
950950
for (i = 0; i < nargs; ++i) {
951951
if (tsyms[i].host_sym == host_symbol) {
952952
tsyms[i].map_type = map_type;
953+
if (STYPEG(tsyms[i].host_sym) != ST_ARRAY) {
954+
/* if scalar variables are used in map clause, pass them by reference */
955+
if (map_type & OMP_TGT_MAPTYPE_FROM || map_type & OMP_TGT_MAPTYPE_TO)
956+
PASSBYREFP(tsyms[i].device_sym, 1);
957+
PASSBYVALP(tsyms[i].device_sym, 0);
958+
}
953959
return true;
954960
}
955961
}
@@ -962,6 +968,7 @@ ompaccel_tinfo_current_add_reductionitem(SPTR private_sym, SPTR shared_sym,
962968
{
963969
if (current_tinfo == nullptr)
964970
ompaccel_msg_interr("XXX", "Current target info is not found.\n");
971+
965972
current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols]
966973
.private_sym = private_sym;
967974
current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols]
@@ -973,9 +980,20 @@ ompaccel_tinfo_current_add_reductionitem(SPTR private_sym, SPTR shared_sym,
973980
// copied back to the host.
974981
PASSBYVALP(private_sym, 0);
975982

976-
ompaccel_tinfo_current_addupdate_mapitem(
977-
(SPTR)HASHLKG(shared_sym),
978-
OMP_TGT_MAPTYPE_TARGET_PARAM | OMP_TGT_MAPTYPE_TO | OMP_TGT_MAPTYPE_FROM);
983+
/* Mark reduction variable as tofrom */
984+
if (ompaccel_tinfo_current_target_mode() ==
985+
mode_target_teams_distribute_parallel_for ||
986+
ompaccel_tinfo_current_target_mode() ==
987+
mode_target_teams_distribute_parallel_for_simd)
988+
ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(private_sym),
989+
OMP_TGT_MAPTYPE_TARGET_PARAM |
990+
OMP_TGT_MAPTYPE_TO |
991+
OMP_TGT_MAPTYPE_FROM);
992+
else
993+
ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(shared_sym),
994+
OMP_TGT_MAPTYPE_TARGET_PARAM |
995+
OMP_TGT_MAPTYPE_TO |
996+
OMP_TGT_MAPTYPE_FROM);
979997
}
980998

981999
void
@@ -1291,7 +1309,7 @@ ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM *ReductionItems, int NumReductions)
12911309
mk_ompaccel_addsymbol(".rhs", dtypeReduceData, SC_DUMMY, ST_VAR);
12921310

12931311
/* Generate function symbol */
1294-
sprintf(name, "%s%d", "ompaccel.reductionfunc", reductionFunctionCounter++);
1312+
sprintf(name, "%s%d", "ompaccel_reduction", reductionFunctionCounter++);
12951313
sptrFn = mk_ompaccel_function(name, 2, func_params, true);
12961314
cr_block();
12971315

@@ -1360,7 +1378,7 @@ ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM *ReductionItems,
13601378
PASSBYVALP(func_params[3], 1);
13611379

13621380
/* Generate function symbol */
1363-
sprintf(name, "%s%d", "ompaccel.shufflereduce", reductionFunctionCounter++);
1381+
sprintf(name, "%s%d", "ompaccel_shufflereduce", reductionFunctionCounter++);
13641382
sptrFn = mk_ompaccel_function(name, 4, func_params, true);
13651383
cr_block();
13661384

@@ -1444,7 +1462,7 @@ ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM *ReductionItems,
14441462
DTYPE dtypeReductionItem;
14451463
char name[30];
14461464

1447-
sprintf(name, "%s%d", "ompaccel.InterWarpCopy", reductionFunctionCounter++);
1465+
sprintf(name, "%s%d", "ompaccel_InterWarpCopy", reductionFunctionCounter++);
14481466
sptrReduceData = func_params[0] = mk_ompaccel_addsymbol(
14491467
".reduceData", mk_ompaccel_array_dtype(DT_INT8, NumReductions), SC_DUMMY,
14501468
ST_ARRAY);
@@ -1982,8 +2000,8 @@ exp_ompaccel_bteams(ILM *ilmp, int curilm, int outlinedCnt, SPTR uplevel_sptr,
19822000
}
19832001

19842002
if (flg.omptarget) {
1985-
ll_rewrite_ilms(-1, curilm, 0);
1986-
return;
2003+
ll_rewrite_ilms(-1, curilm, 0);
2004+
return;
19872005
}
19882006

19892007
if (XBIT(232, 0x1)) {
@@ -2044,7 +2062,7 @@ exp_ompaccel_emap(ILM *ilmp, int curilm)
20442062
{
20452063
int ili;
20462064
OMPACCEL_TINFO *targetinfo;
2047-
if(ompaccel_tinfo_has(gbl.currsub))
2065+
if (ompaccel_tinfo_has(gbl.currsub))
20482066
return;
20492067
ompaccel_symreplacer(true);
20502068
targetinfo = ompaccel_tinfo_current_get();

0 commit comments

Comments
 (0)