-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. #122342
base: main
Are you sure you want to change the base?
Conversation
In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer.
@llvm/pr-subscribers-backend-amdgpu Author: Sumanth Gundapaneni (sgundapa) ChangesIn case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion. Issue found by fuzzer. Full diff: https://github.com/llvm/llvm-project/pull/122342.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..1e32743c3dfeeb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
}
+static bool hasVariableOffset(GetElementPtrInst *GEP) {
+ // Iterate over all operands starting from the first index (index 0 is the
+ // base pointer).
+ for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *Op = GEP->getOperand(i);
+ // Check if the operand is not a constant integer value
+ if (!isa<ConstantInt>(Op)) {
+ return true;
+ }
+ }
+ return false;
+}
+
static Value *
-calculateVectorIndex(Value *Ptr,
- const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
+ const DataLayout &DL) {
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
if (!GEP)
return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+ // If the index of this GEP is a variable that might be deleted,
+ // update the index with its latest value. We've already handled any GEPs
+ // with unsupported index types(in GEPToVectorIndex) at this point.
+ if (hasVariableOffset(GEP)) {
+ unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+ SmallMapVector<Value *, APInt, 4> VarOffsets;
+ APInt ConstOffset(BW, 0);
+ if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
+ if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
+ auto *UpdatedValue = VarOffsets.front().first;
+ GEPIdx[GEP] = UpdatedValue;
+ return UpdatedValue;
+ }
+ }
+ }
+
auto I = GEPIdx.find(GEP);
assert(I != GEPIdx.end() && "Must have entry for GEP!");
return I->second;
@@ -496,7 +525,7 @@ static Value *promoteAllocaUserToVector(
}
Value *Index = calculateVectorIndex(
- cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+ cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);
// We're loading the full vector.
Type *AccessTy = Inst->getType();
@@ -552,7 +581,8 @@ static Value *promoteAllocaUserToVector(
// to know the current value. If this is a store of a single element, we
// need to know the value.
StoreInst *SI = cast<StoreInst>(Inst);
- Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+ Value *Index =
+ calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
Value *Val = SI->getValueOperand();
// We're storing the full vector, we can handle this without knowing CurVal.
@@ -850,7 +880,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
return nullptr;
- return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
+ return dyn_cast<ConstantInt>(
+ calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
};
unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 05c727201bbf1d..9db416041a5bc0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
ret void
}
+%Block4 = type { [2 x i32], i32 }
+@block4 = external addrspace(1) global %Block4
+%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
+@pv1 = external addrspace(1) global %gl_PV
+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
+; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
+; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
+; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
+; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
+; CHECK-NEXT: ret void
+;
+ %f1 = alloca [3 x i32], align 4, addrspace(5)
+ %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+ %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+ store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+ %foo4 = load i32, ptr addrspace(5) %G1, align 4
+ %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
+ %foo6 = load i32, ptr addrspace(5) %foo5, align 4
+ %foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
+ store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
+ ret void
+}
+
define amdgpu_vs void @promote_memmove_aggr() #0 {
; CHECK-LABEL: @promote_memmove_aggr(
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 553fa204ed5ab4c48bc6080451df24310c00e69c 1093642dcc9cfaf7b50bf314f18467df58ced7ab llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI, | |||
match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile(); | |||
} | |||
|
|||
static bool hasVariableOffset(GetElementPtrInst *GEP) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could
%G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8 | ||
%foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4 | ||
store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4 | ||
%foo4 = load i32, ptr addrspace(5) %G1, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
give a better name to indicate this is the interesting value
|
||
; This should should not crash on variable offset that can be | ||
; optimized out (variable foo4 in the test) | ||
define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr { | |
define amdgpu_vs void @promote_load_from_store_aggr_varoff() { |
In case of variable offset of a GEP that can be optimized out, promote alloca is updated to use the refereshed index to avoid an assertion.
Issue found by fuzzer.