Skip to content

Commit 9c2bc63

Browse files
authored
Merge pull request #115 from ThePortlandGroup/nv_stage
Pull 2017-06-28T16-49 Recent NVIDIA Changes
2 parents 58d1a06 + 1b77666 commit 9c2bc63

File tree

15 files changed

+1370
-153
lines changed

15 files changed

+1370
-153
lines changed

tools/flang2/docs/xflag.n

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2180,7 +2180,7 @@ do loops in pgf90.
21802180
linux 64 C++ : revert to .rodata sections, instead of linkonce.r sections for
21812181
jump tables in weak(templated) functions
21822182
.XB 0x1000:
2183-
use to enable new atomic implementation
2183+
Disable new OpenMP atomic and reduction implementation.
21842184
.XB 0x2000:
21852185
Temporary disable collapse clause for distribute parallel loop in Fortran
21862186
.XB 0x4000:
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*
16+
*/
17+
18+
19+
#ifndef PGI_ATOMIC_COMMON_DEF
20+
#define PGI_ATOMIC_COMMON_DEF
21+
22+
/** Specifies memory order of an atomic operation.
23+
Values corresponding to C11/C++11 memory orders are guaranteed
24+
to match those of the target's C11/C++11 header. */
25+
typedef enum MEMORY_ORDER {
26+
MO_RELAXED,
27+
MO_CONSUME,
28+
MO_ACQUIRE,
29+
MO_RELEASE,
30+
MO_ACQ_REL,
31+
MO_SEQ_CST,
32+
MO_MAX_DEF = MO_SEQ_CST, /**< maximum value with defined meaning */
33+
MO_UNDEF = 0xFF /**< denotes "undefined" */
34+
} MEMORY_ORDER;
35+
36+
/** Specifies scope an atomic operation. */
37+
typedef enum SYNC_SCOPE {
38+
SS_SINGLETHREAD, /**< Synchronize only within a thread (e.g. a signal fence). */
39+
SS_PROCESS /**< Synchronize with other threads. */
40+
} SYNC_SCOPE;
41+
42+
/** Specifies source of an atomic operation. */
43+
typedef enum ATOMIC_ORIGIN {
44+
AORG_CPLUS, /**< C++11 or C11 atomic operation */
45+
AORG_OPENMP, /**< OpenMP */
46+
AORG_OPENACC, /**< OpenACC */
47+
AORG_MAX_DEF = AORG_OPENACC /**< maximum value with defined meaning */
48+
} ATOMIC_ORIGIN;
49+
50+
/** Specifies a read-modify-write operation. */
51+
typedef enum ATOMIC_RMW_OP {
52+
AOP_XCHG,
53+
AOP_ADD,
54+
AOP_SUB,
55+
AOP_AND,
56+
AOP_OR,
57+
AOP_MIN,
58+
AOP_MAX,
59+
AOP_XOR,
60+
AOP_MUL,
61+
AOP_DIV,
62+
AOP_SHR,
63+
AOP_SHL,
64+
AOP_EQV,
65+
AOP_NEQV,
66+
AOP_MAX_DEF = AOP_XOR, /**< maximum value with defined meaning */
67+
AOP_UNDEF = 0xFF
68+
} ATOMIC_RMW_OP;
69+
70+
typedef struct CMPXCHG_MEMORY_ORDER {
71+
MEMORY_ORDER success;
72+
MEMORY_ORDER failure;
73+
} CMPXCHG_MEMORY_ORDER;
74+
75+
/** Information about an atomic operation.
76+
The fields are declared unsigned because pgcc/pgc++ 17.1 sign-extends
77+
them if they are declared as enum types. */
78+
typedef struct ATOMIC_INFO {
79+
/*MSZ*/ unsigned msz : 8; /**< size of memory operand */
80+
/*ATOMIC_RMW_OP*/ unsigned op : 8; /**< AOP_UNDEF except for ATOMICRMWx instructions. */
81+
/*ATOMIC_ORIGIN*/ unsigned origin : 2;
82+
/*SYNC_SCOPE*/ unsigned scope : 1;
83+
} ATOMIC_INFO;
84+
85+
/** True if MEMORY_ORDER m performs an acquire operation.
86+
MO_CONSUME is considered to perform an acquire. */
87+
#define MO_HAS_ACQUIRE(m) ((m) != MO_RELAXED && (m) != MO_RELEASE)
88+
89+
/** True if MEMORY_ORDER m performs a release operation */
90+
#define MO_HAS_RELEASE(m) ((m) >= MO_RELEASE)
91+
92+
#endif
93+
94+

tools/flang2/flang2exe/cgmain.c

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5491,6 +5491,28 @@ find_load_cse(int ilix, OPERAND *load_op, LL_Type *llt)
54915491
return NULL;
54925492
}
54935493

5494+
static LOGICAL
5495+
openmp_atomic_ld(int ilix)
5496+
{
5497+
ATOMIC_INFO info;
5498+
switch(ILI_OPC(ilix)) {
5499+
case IL_ATOMICLDI:
5500+
case IL_ATOMICLDKR:
5501+
case IL_ATOMICLDA:
5502+
case IL_ATOMICLDSP:
5503+
case IL_ATOMICLDDP:
5504+
break;
5505+
default:
5506+
return FALSE;
5507+
}
5508+
info = atomic_info(ilix);
5509+
5510+
if (info.origin == AORG_OPENMP)
5511+
return TRUE;
5512+
else
5513+
return FALSE;
5514+
}
5515+
54945516
/**
54955517
\brief return new operand of type OT_TMP as result of loading \p load_op
54965518
@@ -5514,7 +5536,7 @@ make_load(int ilix, OPERAND *load_op, LL_Type *rslt_type, MSZ msz,
55145536
0, ERR_Fatal);
55155537

55165538
cse_op = NULL;
5517-
if (ENABLE_CSE_OPT) {
5539+
if (ENABLE_CSE_OPT && !openmp_atomic_ld(ilix)) {
55185540
operand = find_load_cse(ilix, load_op, rslt_type);
55195541
if (operand != NULL) {
55205542
const int bits = ll_type_int_bits(operand->ll_type);
@@ -6858,6 +6880,9 @@ gen_llvm_expr(int ilix, LL_Type *expected_type)
68586880
make_load(ilix, operand, operand->ll_type->sub_types[0], msz, flags);
68596881
}
68606882
break;
6883+
case IL_ATOMICLDA:
6884+
case IL_ATOMICLDSP:
6885+
case IL_ATOMICLDDP:
68616886
case IL_ATOMICLDI:
68626887
case IL_ATOMICLDKR: {
68636888
LL_InstrListFlags flags;
@@ -7425,6 +7450,12 @@ gen_llvm_expr(int ilix, LL_Type *expected_type)
74257450
case IL_DFRCS:
74267451
if (expected_type == NULL)
74277452
expected_type = make_lltype_from_dtype(DT_CMPLX);
7453+
goto _process_define_ili;
7454+
case IL_FREECD:
7455+
cse_opc = 1;
7456+
case IL_DFRCD:
7457+
if (expected_type == NULL)
7458+
expected_type = make_lltype_from_dtype(DT_DCMPLX);
74287459

74297460
_process_define_ili:
74307461
/* llvm_info.curr_ret_ili = ilix; */
@@ -8033,6 +8064,7 @@ gen_llvm_expr(int ilix, LL_Type *expected_type)
80338064
case IL_ATOMICRMWKR:
80348065
operand = gen_llvm_atomicrmw_expr(ilix);
80358066
break;
8067+
case IL_CMPXCHG_OLDA:
80368068
case IL_CMPXCHG_OLDI:
80378069
case IL_CMPXCHG_OLDKR:
80388070
operand = gen_llvm_cmpxchg_component(ilix, 0);
@@ -8042,6 +8074,7 @@ gen_llvm_expr(int ilix, LL_Type *expected_type)
80428074
/* Any widening should do zero-extend, not sign-extend. */
80438075
operand->flags |= OPF_ZEXT;
80448076
break;
8077+
case IL_CMPXCHGA:
80458078
case IL_CMPXCHGI:
80468079
case IL_CMPXCHGKR:
80478080
operand = gen_llvm_cmpxchg(ilix);

tools/flang2/flang2exe/expand.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ LOGICAL exp_end_atomic(int, int);
337337
#ifdef PD_IS_ATOMIC
338338
bool exp_atomic_intrinsic(PD_KIND pd, ILM *ilmp, int curilm);
339339
#endif
340+
void exp_mp_atomic_read(ILM *);
341+
void exp_mp_atomic_write(ILM *);
342+
void exp_mp_atomic_update(ILM *);
343+
void exp_mp_atomic_capture(ILM *);
340344
#endif /* EXPANDER_DECLARE_INTERNAL */
341345

342346
int gethost_dumlen(int arg, ISZ_T address);

0 commit comments

Comments
 (0)