[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

sgundapa · 2025-01-09T19:04:23Z

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion.

Issue found by fuzzer.

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer.

llvmbot · 2025-01-09T19:04:57Z

@llvm/pr-subscribers-backend-amdgpu

Author: Sumanth Gundapaneni (sgundapa)

Changes

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion.

Issue found by fuzzer.

Full diff: https://github.com/llvm/llvm-project/pull/122342.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+36-5)
(modified) llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll (+28)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..1e32743c3dfeeb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
          match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }
 
+static bool hasVariableOffset(GetElementPtrInst *GEP) {
+  // Iterate over all operands starting from the first index (index 0 is the
+  // base pointer).
+  for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
+    Value *Op = GEP->getOperand(i);
+    // Check if the operand is not a constant integer value
+    if (!isa<ConstantInt>(Op)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 static Value *
-calculateVectorIndex(Value *Ptr,
-                     const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
+                     const DataLayout &DL) {
   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
   if (!GEP)
     return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
 
+  // If the index of this GEP is a variable that might be deleted,
+  // update the index with its latest value. We've already handled any GEPs
+  // with unsupported index types(in GEPToVectorIndex) at this point.
+  if (hasVariableOffset(GEP)) {
+    unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+    SmallMapVector<Value *, APInt, 4> VarOffsets;
+    APInt ConstOffset(BW, 0);
+    if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
+      if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
+        auto *UpdatedValue = VarOffsets.front().first;
+        GEPIdx[GEP] = UpdatedValue;
+        return UpdatedValue;
+      }
+    }
+  }
+
   auto I = GEPIdx.find(GEP);
   assert(I != GEPIdx.end() && "Must have entry for GEP!");
   return I->second;
@@ -496,7 +525,7 @@ static Value *promoteAllocaUserToVector(
     }
 
     Value *Index = calculateVectorIndex(
-        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);
 
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
@@ -552,7 +581,8 @@ static Value *promoteAllocaUserToVector(
     // to know the current value. If this is a store of a single element, we
     // need to know the value.
     StoreInst *SI = cast<StoreInst>(Inst);
-    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+    Value *Index =
+        calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
     Value *Val = SI->getValueOperand();
 
     // We're storing the full vector, we can handle this without knowing CurVal.
@@ -850,7 +880,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
         if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
           return nullptr;
 
-        return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
+        return dyn_cast<ConstantInt>(
+            calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
       };
 
       unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 05c727201bbf1d..9db416041a5bc0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
   ret void
 }
 
+%Block4 = type { [2 x i32], i32 }
+@block4 = external addrspace(1) global %Block4
+%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
+@pv1 = external addrspace(1) global %gl_PV
+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
+; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
+; CHECK-NEXT:    [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
+; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
+; CHECK-NEXT:    store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
+; CHECK-NEXT:    ret void
+;
+  %f1 = alloca [3 x i32], align 4, addrspace(5)
+  %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+  %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+  store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+  %foo4 = load i32, ptr addrspace(5) %G1, align 4
+  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
+  %foo6 = load i32, ptr addrspace(5) %foo5, align 4
+  %foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
+  store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
+  ret void
+}
+
 define amdgpu_vs void @promote_memmove_aggr() #0 {
 ; CHECK-LABEL: @promote_memmove_aggr(
 ; CHECK-NEXT:    store float 1.000000e+00, ptr addrspace(1) @pv, align 4

github-actions · 2025-01-09T19:07:46Z

⚠️ undef deprecator found issues in your code. ⚠️

You can test this locally with the following command:

git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 553fa204ed5ab4c48bc6080451df24310c00e69c 1093642dcc9cfaf7b50bf314f18467df58ced7ab llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

The following files introduce new uses of undef:

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields undef. You should use poison values for placeholders instead.

In tests, avoid using undef and having tests that trigger undefined behavior. If you need an operand with some unimportant value, you can add a new argument to the function and use that instead.

For example, this is considered a bad practice:

define void @fn() {
  ...
  br i1 undef, ...
}

Please use the following instead:

define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}

Please refer to the Undefined Behavior Manual for more information.

arsenm · 2025-01-10T02:30:33Z

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
         match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }

+static bool hasVariableOffset(GetElementPtrInst *GEP) {


This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could

arsenm · 2025-01-10T02:38:38Z

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

+  %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+  %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+  store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+  %foo4 = load i32, ptr addrspace(5) %G1, align 4


give a better name to indicate this is the interesting value

arsenm · 2025-01-10T02:38:56Z

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {


Suggested change

define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {

define amdgpu_vs void @promote_load_from_store_aggr_varoff() {

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset.

1093642

In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer.

sgundapa requested review from arsenm and bcahoon January 9, 2025 19:04

llvmbot added the backend:AMDGPU label Jan 9, 2025

arsenm reviewed Jan 10, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

sgundapa commented Jan 9, 2025

llvmbot commented Jan 9, 2025

github-actions bot commented Jan 9, 2025

arsenm Jan 10, 2025

arsenm Jan 10, 2025

arsenm Jan 10, 2025

	define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
	define amdgpu_vs void @promote_load_from_store_aggr_varoff() {

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

Are you sure you want to change the base?

[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342

Conversation

sgundapa commented Jan 9, 2025

llvmbot commented Jan 9, 2025

github-actions bot commented Jan 9, 2025

arsenm Jan 10, 2025

Choose a reason for hiding this comment

arsenm Jan 10, 2025

Choose a reason for hiding this comment

arsenm Jan 10, 2025

Choose a reason for hiding this comment