Skip to content

Commit 5afd830

Browse files
matborzyszkowskiigcbot
authored andcommitted
Add Efficient 64b Addressing support
Add Efficient 64b Addressing support
1 parent a56148d commit 5afd830

File tree

64 files changed

+2388
-339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2388
-339
lines changed

IGC/AdaptorCommon/ImplicitArgs.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,11 @@ static const std::vector<ImplicitArg> IMPLICIT_ARGS = {
167167
ImplicitArg(ImplicitArg::ASSERT_BUFFER_POINTER, "assertBufferPointer", ImplicitArg::GLOBALPTR,
168168
WIAnalysis::UNIFORM_GLOBAL, 1, ImplicitArg::ALIGN_GRF, false,
169169
GenISAIntrinsic::GenISA_getAssertBufferPtr),
170+
ImplicitArg(ImplicitArg::INDIRECT_DATA_POINTER, "indirectDataPointer", ImplicitArg::GLOBALPTR,
171+
WIAnalysis::UNIFORM_GLOBAL, 1, ImplicitArg::ALIGN_GRF, true,
172+
GenISAIntrinsic::GenISA_getIndirectDataPtr),
173+
ImplicitArg(ImplicitArg::SCRATCH_POINTER, "scratchPointer", ImplicitArg::GLOBALPTR, WIAnalysis::UNIFORM_GLOBAL, 1,
174+
ImplicitArg::ALIGN_PTR, true, GenISAIntrinsic::GenISA_getScratchPtr),
170175

171176
// BufferBoundsChecking
172177
ImplicitArg(ImplicitArg::BUFFER_SIZE, "bufferSize", ImplicitArg::LONG, WIAnalysis::UNIFORM_GLOBAL, 1,

IGC/AdaptorCommon/ImplicitArgs.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ class ImplicitArg {
124124
IMPLICIT_ARG_BUFFER_PTR,
125125

126126
ASSERT_BUFFER_POINTER,
127+
INDIRECT_DATA_POINTER,
128+
SCRATCH_POINTER,
127129

128130
// BufferBoundsChecking
129131
BUFFER_SIZE,

IGC/AdaptorCommon/RayTracing/API/RayDispatchGlobalData.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class RayDispatchGlobalDataAdaptor {
115115
uint64_t GetStatelessScratchPtr() const { return 0; };
116116
uint32_t GetBaseSSHOffset() const { return 0; };
117117
uint32_t GetUberTilesMap() const { return 0; };
118+
uint64_t GetBaseSurfaceStatePointer() const { return 0; };
118119
};
119120

120121
// Layout used to pass global data to the shaders
@@ -155,6 +156,15 @@ struct RayDispatchGlobalData {
155156
uint32_t paddingBits3; // 32-bits of padding
156157
};
157158

159+
// For Efficient64 there is no baseSSH, the full address of the
160+
// resource must calculated in the shader. Instead of delivering
161+
// separately the offset to the first Stack SurfaceState, and
162+
// the pointer to the baseSurfaceState (in Constants), UMD
163+
// will deliver baseSurfaceStatePointer + OffsetToTheFirstStackSS
164+
// in baseSurfaceStatePointer.
165+
struct {
166+
uint64_t baseSurfaceStatePointer;
167+
};
158168
};
159169

160170
uint64_t uberTilesMap; // base address of the uber tiles map used for AtomicPull model
@@ -177,6 +187,11 @@ struct RayDispatchGlobalData {
177187
pMissShaderStride = umd.GetMissStride();
178188
pRtMemBasePtr = umd.GetRayStackBufferAddress();
179189
baseSSHOffset = umd.GetBaseSSHOffset();
190+
// Check if baseSurfaceStatePointer is delivered from the UMD, as
191+
// it shares the same memory with baseSSHOffset in the RayDispatchGlobalDataCommon.
192+
// baseSurfaceStatePointer will be written only if it is not zero.
193+
if (uint64_t Tmp = umd.GetBaseSurfaceStatePointer())
194+
baseSurfaceStatePointer = Tmp;
180195
pStackSizePerRay = umd.GetStackSizePerRay();
181196
swStackSizePerRay = umd.GetSWStackSizePerRay();
182197
pNumDSSRTStacks = umd.GetNumDSSRTStacks();

IGC/AdaptorCommon/RayTracing/RTBuilder.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ Value *RTBuilder::getMaxBVHLevels(void) {
116116
Value *RTBuilder::getStatelessScratchPtr(void) { return _get_statelessScratchPtr(VALUE_NAME("statelessScratchPtr")); }
117117

118118

119+
Value *RTBuilder::getBaseSurfaceStatePointer(Value *rayDispatchGlobalDataPtr) {
120+
// For non-RT shaders, which use RayQuery GlobalBufferPointer is delivered
121+
// in pushConstants. It must be read and passed to this function.
122+
return _getBaseSurfaceStatePointerFromPointerToGlobals(rayDispatchGlobalDataPtr,
123+
VALUE_NAME("BaseSurfaceStatePointerFromPointerToGlobals"));
124+
}
125+
119126
Value *RTBuilder::getIsFrontFace(RTBuilder::StackPointerVal *StackPointer, Value *ShaderTy) {
120127
auto *isCommitted = CreateICmpEQ(ShaderTy, getInt32(CallableShaderTypeMD::ClosestHit));
121128
switch (getMemoryStyle()) {

IGC/AdaptorCommon/RayTracing/RTBuilder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ class RTBuilder : public IGCIRBuilder<> {
204204
Value *getRtMemBasePtr(Value *globalBufferPtr = nullptr);
205205
Value *getStackSizePerRay(void);
206206
Value *getNumDSSRTStacks(void);
207+
Value *getBaseSurfaceStatePointer(void);
208+
Value *getBaseSurfaceStatePointer(Value *RayDispatchGlobalData);
207209
Value *getMaxBVHLevels(void);
208210
Value *getStatelessScratchPtr(void);
209211
Value *getLeafType(StackPointerVal *StackPointer, Value *CommittedHit);

IGC/AdaptorCommon/RayTracing/RTStackReflectionIRBG/reflection.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ RTGlobals(maxBVHLevels, Xe3, xe3.rt_data_info);
118118
RTGlobalsCommon(statelessScratchPtr);
119119

120120

121+
CREATE_PRIVATE auto
122+
_getBaseSurfaceStatePointerFromPointerToGlobals(RTGAS RayDispatchGlobalData *__restrict__ GlobalsPtr) {
123+
return GlobalsPtr->rt.xe3.common.baseSurfaceStatePointer;
124+
}
121125

122126
//////////// End Common Fields ////////////
123127

IGC/AdaptorOCL/Utils/CacheControlsHelper.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,54 @@ const CacheControlMapTy<LoadCacheControl> supportedLoadConfigs = {
7676
{ LSC_L1IAR_L3IAR, { LoadCacheControl::InvalidateAfterRead, LoadCacheControl::InvalidateAfterRead } },
7777
// clang-format on
7878
};
79+
template <typename T> struct SeparateCacheControlsL1L2L3 {
80+
T L1;
81+
T L2;
82+
T L3;
83+
};
84+
85+
template <typename T>
86+
using L1L2L3StoreCacheControlMapTy =
87+
std::unordered_map<LSC_STCC_L1_L2_L3, SeparateCacheControlsL1L2L3<T>, std::hash<int>>;
88+
const L1L2L3StoreCacheControlMapTy<StoreCacheControl> supportedL1L2L3StoreConfigs = {
89+
// clang-format off
90+
{ LSC_STCC_L1UC_L2UC_L3UC, { StoreCacheControl::Uncached, StoreCacheControl::Uncached, StoreCacheControl::Uncached } },
91+
{ LSC_STCC_L1UC_L2UC_L3WB, { StoreCacheControl::Uncached, StoreCacheControl::Uncached, StoreCacheControl::WriteBack } },
92+
{ LSC_STCC_L1UC_L2WB_L3UC, { StoreCacheControl::Uncached, StoreCacheControl::WriteBack, StoreCacheControl::Uncached } },
93+
{ LSC_STCC_L1UC_L2WB_L3WB, { StoreCacheControl::Uncached, StoreCacheControl::WriteBack, StoreCacheControl::WriteBack } },
94+
{ LSC_STCC_L1WT_L2UC_L3UC, { StoreCacheControl::WriteThrough, StoreCacheControl::Uncached, StoreCacheControl::Uncached } },
95+
{ LSC_STCC_L1WT_L2UC_L3WB, { StoreCacheControl::WriteThrough, StoreCacheControl::Uncached, StoreCacheControl::WriteBack } },
96+
{ LSC_STCC_L1WT_L2WB_L3UC, { StoreCacheControl::WriteThrough, StoreCacheControl::WriteBack, StoreCacheControl::Uncached } },
97+
{ LSC_STCC_L1WT_L2WB_L3WB, { StoreCacheControl::WriteThrough, StoreCacheControl::WriteBack, StoreCacheControl::WriteBack } },
98+
{ LSC_STCC_L1S_L2UC_L3UC, { StoreCacheControl::Streaming, StoreCacheControl::Uncached, StoreCacheControl::Uncached } },
99+
{ LSC_STCC_L1S_L2UC_L3WB, { StoreCacheControl::Streaming, StoreCacheControl::Uncached, StoreCacheControl::WriteBack } },
100+
{ LSC_STCC_L1S_L2WB_L3UC, { StoreCacheControl::Streaming, StoreCacheControl::WriteBack, StoreCacheControl::Uncached } },
101+
{ LSC_STCC_L1WB_L2UC_L3UC, { StoreCacheControl::WriteBack, StoreCacheControl::Uncached, StoreCacheControl::Uncached } },
102+
{ LSC_STCC_L1WB_L2WB_L3UC, { StoreCacheControl::WriteBack, StoreCacheControl::WriteBack, StoreCacheControl::Uncached } },
103+
{ LSC_STCC_L1WB_L2UC_L3WB, { StoreCacheControl::WriteBack, StoreCacheControl::Uncached, StoreCacheControl::WriteBack } },
104+
// clang-format on
105+
};
106+
107+
template <typename T>
108+
using L1L2L3LoadCacheControlMapTy =
109+
std::unordered_map<LSC_LDCC_L1_L2_L3, SeparateCacheControlsL1L2L3<T>, std::hash<int>>;
110+
const L1L2L3LoadCacheControlMapTy<LoadCacheControl> supportedL1L2L3LoadConfigs = {
111+
// clang-format off
112+
{ LSC_LDCC_L1UC_L2UC_L3UC, { LoadCacheControl::Uncached, LoadCacheControl::Uncached, LoadCacheControl::Uncached } },
113+
{ LSC_LDCC_L1UC_L2UC_L3C, { LoadCacheControl::Uncached, LoadCacheControl::Uncached, LoadCacheControl::Cached } },
114+
{ LSC_LDCC_L1UC_L2C_L3UC, { LoadCacheControl::Uncached, LoadCacheControl::Cached, LoadCacheControl::Uncached } },
115+
{ LSC_LDCC_L1UC_L2C_L3C, { LoadCacheControl::Uncached, LoadCacheControl::Cached, LoadCacheControl::Cached } },
116+
{ LSC_LDCC_L1C_L2UC_L3UC, { LoadCacheControl::Cached, LoadCacheControl::Uncached, LoadCacheControl::Uncached } },
117+
{ LSC_LDCC_L1C_L2UC_L3C, { LoadCacheControl::Cached, LoadCacheControl::Uncached, LoadCacheControl::Cached } },
118+
{ LSC_LDCC_L1C_L2C_L3UC, { LoadCacheControl::Cached, LoadCacheControl::Cached, LoadCacheControl::Uncached } },
119+
{ LSC_LDCC_L1C_L2C_L3C, { LoadCacheControl::Cached, LoadCacheControl::Cached, LoadCacheControl::Cached } },
120+
{ LSC_LDCC_L1S_L2UC_L3UC, { LoadCacheControl::Streaming, LoadCacheControl::Uncached, LoadCacheControl::Uncached } },
121+
{ LSC_LDCC_L1S_L2UC_L3C, { LoadCacheControl::Streaming, LoadCacheControl::Uncached, LoadCacheControl::Cached } },
122+
{ LSC_LDCC_L1S_L2C_L3UC, { LoadCacheControl::Streaming, LoadCacheControl::Cached, LoadCacheControl::Uncached } },
123+
{ LSC_LDCC_L1S_L2C_L3C, { LoadCacheControl::Streaming, LoadCacheControl::Cached, LoadCacheControl::Cached } },
124+
{ LSC_LDCC_L1IAR_L2IAR_L3IAR, { LoadCacheControl::InvalidateAfterRead, LoadCacheControl::InvalidateAfterRead, LoadCacheControl::InvalidateAfterRead } },
125+
// clang-format on
126+
};
79127

80128
using CacheLevel = uint64_t;
81129

IGC/AdaptorOCL/dllInterfaceCompute.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,9 @@ bool TranslateBuild(const STB_TranslateInputArgs *pInputArgs, STB_TranslateOutpu
14181418
TB_DATA_FORMAT inputDataFormatTemp, const IGC::CPlatform &IGCPlatform,
14191419
float profilingTimerResolution) {
14201420
ShaderHash inputShHash;
1421+
if (IGCPlatform.hasEfficient64bEnabled()) {
1422+
IGC_SET_FLAG_VALUE(EnableEfficient64b, true);
1423+
}
14211424
if (IGC_IS_FLAG_ENABLED(EnableKernelNamesBasedHash)) {
14221425
// Create the hash based on kernel names.
14231426
// This takes the names and concatenates them into a string

IGC/AdaptorOCL/ocl_igc_interface/igc_features_and_workarounds.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,13 @@ CIF_DEFINE_INTERFACE_VER_WITH_COMPATIBILITY(IgcFeaturesAndWorkarounds, 2, 1) {
8787
OCL_API_CALL virtual void SetMaxOCLParamSize(uint32_t s);
8888
OCL_API_CALL virtual uint32_t GetMaxOCLParamSize() const;
8989
};
90-
CIF_DEFINE_INTERFACE_VER_WITH_COMPATIBILITY(IgcFeaturesAndWorkarounds, 3, 2) { CIF_INHERIT_CONSTRUCTOR(); };
90+
91+
CIF_DEFINE_INTERFACE_VER_WITH_COMPATIBILITY(IgcFeaturesAndWorkarounds, 3, 2) {
92+
CIF_INHERIT_CONSTRUCTOR();
93+
94+
virtual void SetFtrEfficient64BitAddressing(bool v);
95+
virtual bool GetFtrEfficient64BitAddressing() const;
96+
};
9197

9298
CIF_GENERATE_VERSIONS_LIST(IgcFeaturesAndWorkarounds);
9399
CIF_MARK_LATEST_VERSION(IgcFeaturesAndWorkaroundsLatest, IgcFeaturesAndWorkarounds);

IGC/AdaptorOCL/ocl_igc_interface/impl/igc_features_and_workarounds_impl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ void CIF_GET_INTERFACE_CLASS(IgcFeaturesAndWorkarounds, 2)::SetMaxOCLParamSize(u
5353
IGC_ASSERT(s >= OCLCaps::MINIMAL_MAX_PARAMETER_SIZE);
5454
CIF_GET_PIMPL()->OCLCaps.MaxParameterSize = s;
5555
}
56+
bool CIF_GET_INTERFACE_CLASS(IgcFeaturesAndWorkarounds, 3)::GetFtrEfficient64BitAddressing() const {
57+
return CIF_GET_PIMPL()->FeTable.FtrEfficient64BitAddressing;
58+
}
59+
void CIF_GET_INTERFACE_CLASS(IgcFeaturesAndWorkarounds, 3)::SetFtrEfficient64BitAddressing(bool v) {
60+
CIF_GET_PIMPL()->FeTable.FtrEfficient64BitAddressing = v;
61+
}
5662

5763
} // namespace IGC
5864

0 commit comments

Comments
 (0)