IGCVectorizer supports PHI for SIMD32

esukhov · igcbot · commit 109a9e7734f6 · 2025-12-03T14:42:08.000+01:00
IGCVectorizer supports PHI, insert/extract element instructions for simd32.
Other instructions are blacklisted at the moment.
diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
@@ -4298,8 +4298,8 @@ void EmitPass::Sub(const SSource sources[2], const DstModifier &modifier) {
 
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
-    IGC_ASSERT_EXIT_MESSAGE(m_encoder->GetSimdSize() == lanesToSIMDMode(16),
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(m_encoder->GetSimdSize() == lanesToSIMDMode(16),
+                       "As of now Vector Emission is only supported for SIMD16");
 
     unsigned VectorSize = getVectorSize(sources[0].value);
     IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
@@ -4389,8 +4389,8 @@ void EmitPass::Add(const SSource sources[2], const DstModifier &modifier) {
 
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
-    IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                       "As of now Vector Emission is only supported for SIMD16");
 
     bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
     // cannot emit 16 SIMD if SIMD SIZE is set to 8, but can emit 4
@@ -4448,8 +4448,8 @@ void EmitPass::Mul(const SSource sources[2], const DstModifier &modifier) {
 
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
-    IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                       "As of now Vector Emission is only supported for SIMD16");
     unsigned VectorSize = getVectorSize(sources[0].value);
     IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
 
@@ -4528,8 +4528,8 @@ void EmitPass::Div(const SSource sources[2], const DstModifier &modifier) {
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
       sources[1].value->getType()->isVectorTy()) {
 
-    IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                       "As of now Vector Emission is only supported for SIMD16");
     unsigned VectorSize = getVectorSize(sources[0].value);
 
     for (unsigned i = 0; i < VectorSize; ++i) {
@@ -4561,8 +4561,8 @@ void EmitPass::Inv(const SSource sources[2], const DstModifier &modifier) {
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
       sources[1].value->getType()->isVectorTy()) {
 
-    IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                       "As of now Vector Emission is only supported for SIMD16");
     unsigned VectorSize = getVectorSize(sources[0].value);
 
     CVariable *src[1];
@@ -4596,8 +4596,8 @@ void EmitPass::MaxNum(const SSource sources[2], const DstModifier &modifier) {
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
       sources[1].value->getType()->isVectorTy()) {
 
-    IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                       "As of now Vector Emission is only supported for SIMD16");
     unsigned VectorSize = getVectorSize(sources[0].value);
 
     bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
@@ -4640,16 +4640,16 @@ void EmitPass::MaxNum(const SSource sources[2], const DstModifier &modifier) {
     return;
   }
 
-  IGC_ASSERT_EXIT_MESSAGE(0, " if we are at this part, something went wrong "
-                             "with maxnum vectorization");
+  IGC_ASSERT_MESSAGE(0, " if we are at this part, something went wrong "
+                        "with maxnum vectorization");
 }
 
 void EmitPass::Exp2(const SSource sources[2], const DstModifier &modifier) {
 
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy()) {
 
-    IGC_ASSERT_EXIT_MESSAGE(m_encoder->GetSimdSize() == lanesToSIMDMode(16),
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(m_encoder->GetSimdSize() == lanesToSIMDMode(16),
+                       "As of now Vector Emission is only supported for SIMD16");
 
     unsigned VectorSize = getVectorSize(sources[0].value);
 
@@ -4747,8 +4747,8 @@ void EmitPass::FDiv(const SSource sources[2], const DstModifier &modifier) {
 
   if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
-    IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                            "As of now Vector Emission is only supported for SIMD16");
+    IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                       "As of now Vector Emission is only supported for SIMD16");
     if (isVectorOfOnes(sources[0].value))
       Inv(sources, modifier);
     else
@@ -4807,8 +4807,8 @@ void EmitPass::VectorCMP(llvm::CmpInst::Predicate pred, const SSource sources[2]
     dst = m_currShader->BitCast(m_destination, src[0]->GetType());
   }
 
-  IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
-                          "As of now Vector Emission is only supported for SIMD16");
+  IGC_ASSERT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
+                     "As of now Vector Emission is only supported for SIMD16");
   unsigned VectorSize = getVectorSize(sources[0].value);
 
   bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
@@ -4818,7 +4818,6 @@ void EmitPass::VectorCMP(llvm::CmpInst::Predicate pred, const SSource sources[2]
   bool CanEmitThisSize = VectorSize <= SIMDSize;
 
   if (IGC_IS_FLAG_ENABLED(VectorizerUniformValueVectorizationEnabled) && AllUniform && CanEmitThisSize) {
-
     m_encoder->SetSrcRegion(0, 1, 1, 0);
     m_encoder->SetSrcRegion(1, 1, 1, 0);
     m_encoder->SetUniformSIMDSize(lanesToSIMDMode(VectorSize));
@@ -5025,6 +5024,7 @@ void EmitPass::VectorSelect(const SSource sources[3], const DstModifier &modifie
   }
 
   bool PredicateLengthIsCorrect = flag->GetNumberElement() == 1 || flag->GetNumberElement() == SIMDSize;
+
   IGC_ASSERT_EXIT_MESSAGE(PredicateLengthIsCorrect, "we can only emit non-uniform selects with matching predicate");
 
   for (unsigned i = 0; i < VectorSize; ++i) {
diff --git a/IGC/Compiler/CISACodeGen/IGCVectorizer.cpp b/IGC/Compiler/CISACodeGen/IGCVectorizer.cpp
@@ -295,7 +295,14 @@ bool isAllowedStub(Instruction *I) {
   return Result;
 }
 
-bool isSafeToVectorize(Instruction *I) {
+bool IGCVectorizer::isSafeToVectorizeSIMD32(Instruction *I) {
+  bool IsExtract = llvm::isa<ExtractElementInst>(I);
+  bool IsInsert = llvm::isa<InsertElementInst>(I);
+  bool Result = isPHISafe(I) || IsExtract || IsInsert;
+  return Result;
+}
+
+bool IGCVectorizer::isSafeToVectorizeSIMD16(Instruction *I) {
 
   bool IsExtract = llvm::isa<ExtractElementInst>(I);
   bool IsInsert = llvm::isa<InsertElementInst>(I);
@@ -323,6 +330,17 @@ bool isSafeToVectorize(Instruction *I) {
   return Result;
 }
 
+bool IGCVectorizer::isSafeToVectorize(Instruction *I) {
+
+  if (SIMDSize == 16)
+    return isSafeToVectorizeSIMD16(I);
+  else if (SIMDSize == 32)
+    return isSafeToVectorizeSIMD32(I);
+  else
+    IGC_ASSERT_EXIT_MESSAGE(0, "not supported simd");
+  return false;
+}
+
 bool IGCVectorizer::handleStub(VecArr &Slice) {
   PRINT_LOG("stub vectorization: ");
   PRINT_INST_NL(Slice.front());
@@ -1400,31 +1418,31 @@ void IGCVectorizer::collectInstructionToProcess(VecArr &ToProcess, Function &F)
   }
 }
 
-bool IGCVectorizer::checkIfSIMD16(llvm::Function &F) {
+unsigned IGCVectorizer::checkSIMD(llvm::Function &F) {
 
   MDUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
-  bool Result = false;
+  unsigned SimdSize = 0;
   if (MDUtils->findFunctionsInfoItem(&F) != MDUtils->end_FunctionsInfo()) {
     IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
-    unsigned SimdSize = funcInfoMD->getSubGroupSize()->getSIMDSize();
-    Result = SimdSize == 16;
+    SimdSize = funcInfoMD->getSubGroupSize()->getSIMDSize();
   }
 
-  return Result;
+  return SimdSize;
 }
 
 bool IGCVectorizer::runOnFunction(llvm::Function &F) {
 
-  // DPAS only allowed in simd16 mode + helps to reduce untested cases
-  if (!checkIfSIMD16(F))
+  SIMDSize = checkSIMD(F);
+  if (SIMDSize == 0)
     return false;
 
   WI = &getAnalysis<WIAnalysis>();
 
   M = F.getParent();
   CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
   initializeLogFile(F);
-  PRINT_LOG_NL("vectorizer: fadd, fdiv, fptrunc");
+  PRINT_LOG_NL(" SIMD Size: " << SIMDSize);
+  PRINT_LOG_NL("vectorizer: fadd, fdiv, fptrunc, select, cmp, intrinsics, genintrinsics, simd32");
 
   VecArr ToProcess;
   // we collect operands that seem promising for vectorization
diff --git a/IGC/Compiler/CISACodeGen/IGCVectorizer.h b/IGC/Compiler/CISACodeGen/IGCVectorizer.h
@@ -71,10 +71,15 @@ class IGCVectorizer : public llvm::FunctionPass {
   std::string LogStr;
   llvm::raw_string_ostream OutputLogStream = raw_string_ostream(LogStr);
   Module *M = nullptr;
-  bool checkIfSIMD16(llvm::Function &F);
+  unsigned SIMDSize = 0;
+  unsigned checkSIMD(llvm::Function &F);
   void initializeLogFile(Function &F);
   void writeLog();
 
+  bool isSafeToVectorize(llvm::Instruction *I);
+  bool isSafeToVectorizeSIMD16(llvm::Instruction *I);
+  bool isSafeToVectorizeSIMD32(llvm::Instruction *I);
+
   void findInsertElementsInDataFlow(llvm::Instruction *I, VecArr &Chain);
   bool checkSlice(VecArr &Slice, InsertStruct &InSt);
   bool processChain(InsertStruct &InSt);
diff --git a/IGC/Compiler/tests/EmitVISAPass/vectorizer-emission-select.ll b/IGC/Compiler/tests/EmitVISAPass/vectorizer-emission-select.ll
@@ -5,7 +5,7 @@
 ; SPDX-License-Identifier: MIT
 ;
 ;============================ end_copyright_notice =============================
-; REQUIRES: regkeys, llvm-16-plus, debug
+; REQUIRES: regkeys, llvm-16-plus
 ; RUN: igc_opt -S %s --opaque-pointers -dce -platformbmg -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 &> %t_output.ll
 ; RUN: FileCheck --input-file %t_output.ll %s
 
diff --git a/IGC/Compiler/tests/EmitVISAPass/vectorizer-emission-simd32-phi-dpas-basic.ll b/IGC/Compiler/tests/EmitVISAPass/vectorizer-emission-simd32-phi-dpas-basic.ll
@@ -0,0 +1,54 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2025 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+; REQUIRES: regkeys, llvm-16-plus
+; RUN: igc_opt -S %s -opaque-pointers -platformbmg -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 32 &> %t_output.ll
+; RUN: FileCheck --input-file %t_output.ll %s
+
+; CHECK: .decl vectorized_phi v_type=G type=f num_elts=128 align=wordx32
+; CHECK: .decl vectorized_phi71 v_type=G type=f num_elts=128 align=wordx32
+; CHECK: .kernel_attr SimdSize=32
+
+; CHECK: mov (M1, 32) vectorized_phi(0,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi(2,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi(4,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi(6,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi71(0,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi71(2,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi71(4,0)<1> 0x0:f
+; CHECK: mov (M1, 32) vectorized_phi71(6,0)<1> 0x0:f
+
+; CHECK: dpas.bf.bf.8.8 (M1, 16) vectorized_phi.0
+; CHECK: dpas.bf.bf.8.8 (M1, 16) vectorized_phi71.0
+; CHECK: dpas.bf.bf.8.8 (M1, 16) vectorized_phi.0
+; CHECK: dpas.bf.bf.8.8 (M1, 16) vectorized_phi71.0
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
+target triple = "spir64-unknown-unknown"
+
+define spir_kernel void @foo() {
+bb:
+  br label %bb79
+
+bb79:                                             ; preds = %bb79, %bb
+  %vectorized_phi = phi <4 x float> [ zeroinitializer, %bb ], [ %i264, %bb79 ]
+  %vectorized_phi71 = phi <4 x float> [ zeroinitializer, %bb ], [ %i265, %bb79 ]
+  %i248 = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %vectorized_phi, <4 x i16> zeroinitializer, <4 x i32> zeroinitializer, i32 11, i32 11, i32 8, i32 8, i1 false)
+  %i249 = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %vectorized_phi71, <4 x i16> zeroinitializer, <4 x i32> zeroinitializer, i32 11, i32 11, i32 8, i32 8, i1 false)
+  %i264 = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %i248, <4 x i16> zeroinitializer, <4 x i32> zeroinitializer, i32 11, i32 11, i32 8, i32 8, i1 false)
+  %i265 = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %i249, <4 x i16> zeroinitializer, <4 x i32> zeroinitializer, i32 11, i32 11, i32 8, i32 8, i1 false)
+  br label %bb79
+}
+
+declare <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float>, <4 x i16>, <4 x i32>, i32, i32, i32, i32, i1)
+
+!igc.functions = !{!0}
+
+!0 = distinct !{ptr @foo, !1}
+!1 = distinct !{!2}
+!2 = distinct !{!"function_type", i32 0}
diff --git a/IGC/Compiler/tests/EmitVISAPass/vectorizer-vector-emission-fmul-disable-simd32.ll b/IGC/Compiler/tests/EmitVISAPass/vectorizer-vector-emission-fmul-disable-simd32.ll
@@ -0,0 +1,37 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2025 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+; REQUIRES: regkeys, llvm-16-plus
+
+; RUN: not --crash igc_opt -S -opaque-pointers -dce -platformpvc -igc-emit-visa --regkey=EnableAssertEvaluation=1 --regkey=EnableAssertProgramTermination=0 --regkey=EnableLogAssertToStderr=1 --regkey=EnableStandardAssert=1 --regkey=DumpVISAASMToConsole=1 -simd-mode 32 < %s &> %t_output.ll
+; RUN: FileCheck --input-file %t_output.ll %s
+
+; CHECK: numLanes(m_encoder->GetSimdSize()) == 16, As of now Vector Emission is only supported for SIMD16
+
+define spir_kernel void @widget() {
+entry:
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.._crit_edge_crit_edge, %6
+  %vectorized_phi = phi <8 x float> [ zeroinitializer, %entry ], [ %result, %._crit_edge.._crit_edge_crit_edge ]
+  %vector = insertelement <8 x float> zeroinitializer, float 0.000000e+00, i64 0
+  %vectorized_binary = fmul <8 x float> %vector, %vectorized_phi
+  %result = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %vectorized_binary, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
+  br label %._crit_edge.._crit_edge_crit_edge
+
+._crit_edge.._crit_edge_crit_edge:                ; preds = %._crit_edge
+  br label %._crit_edge
+}
+
+declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
+
+!igc.functions = !{!0}
+
+!0 = distinct !{ptr @widget, !1}
+!1 = distinct !{!2}
+!2 = distinct !{!"function_type", i32 0}
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-binary-fmul-simd32-unsupported.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-binary-fmul-simd32-unsupported.ll
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-phi-dpas-basic-simd32.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-phi-dpas-basic-simd32.ll

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`; SPDX-License-Identifier: MIT`
`6`	`6`	`;`
`7`	`7`	`;============================ end_copyright_notice =============================`
`8`		`-; REQUIRES: regkeys, llvm-16-plus, debug`
	`8`	`+; REQUIRES: regkeys, llvm-16-plus`
`9`	`9`	`; RUN: igc_opt -S %s --opaque-pointers -dce -platformbmg -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 &> %t_output.ll`
`10`	`10`	`; RUN: FileCheck --input-file %t_output.ll %s`
`11`	`11`