Skip to content

Commit b047a0e

Browse files
authored
[AArch64][SVE] Upgrade PTRUE patterns to ALL when they match vector length. (#172993)
When the number of active elements of a PTRUE pattern matches the scalable vector length, we can upgrade the pattern to ALL. This enables CSE with similar PTRUEs as well as other simplifications. There was similar logic in `getPredicateForFixedLengthVector`, which I've removed as it should no longer be needed with this change. This change should also make the VLS matching in `isAllActivePredicate` redundant, which I've also removed.
1 parent 45fefff commit b047a0e

File tree

5 files changed

+18
-31
lines changed

5 files changed

+18
-31
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5894,6 +5894,17 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
58945894
int Pattern) {
58955895
if (Pattern == AArch64SVEPredPattern::all)
58965896
return DAG.getConstant(1, DL, VT);
5897+
5898+
// When the number of active elements of a pattern matches the scalable vector
5899+
// length, we can upgrade the pattern to ALL and emit a splat instead.
5900+
if (unsigned PatNumElts = getNumElementsFromSVEPredPattern(Pattern)) {
5901+
const AArch64Subtarget &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
5902+
unsigned NumElts = VT.getVectorMinNumElements();
5903+
unsigned VScale = Subtarget.getSVEVectorSizeInBits() / 128;
5904+
if (PatNumElts == (NumElts * VScale))
5905+
return DAG.getConstant(1, DL, VT);
5906+
}
5907+
58975908
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
58985909
DAG.getTargetConstant(Pattern, DL, MVT::i32));
58995910
}
@@ -15336,20 +15347,6 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
1533615347
N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
1533715348
return N.getValueType().getVectorMinNumElements() >= NumElts;
1533815349

15339-
// If we're compiling for a specific vector-length, we can check if the
15340-
// pattern's VL equals that of the scalable vector at runtime.
15341-
if (N.getOpcode() == AArch64ISD::PTRUE) {
15342-
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
15343-
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
15344-
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
15345-
if (MaxSVESize && MinSVESize == MaxSVESize) {
15346-
unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
15347-
unsigned PatNumElts =
15348-
getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
15349-
return PatNumElts == (NumElts * VScale);
15350-
}
15351-
}
15352-
1535315350
return false;
1535415351
}
1535515352

@@ -30337,16 +30334,6 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
3033730334
getSVEPredPatternFromNumElements(VT.getVectorNumElements());
3033830335
assert(PgPattern && "Unexpected element count for SVE predicate");
3033930336

30340-
// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
30341-
// AArch64SVEPredPattern::all, which can enable the use of unpredicated
30342-
// variants of instructions when available.
30343-
const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
30344-
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
30345-
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
30346-
if (MaxSVESize && MinSVESize == MaxSVESize &&
30347-
MaxSVESize == VT.getSizeInBits())
30348-
PgPattern = AArch64SVEPredPattern::all;
30349-
3035030337
MVT MaskVT;
3035130338
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
3035230339
default:

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ entry:
453453
define <vscale x 16 x i1> @lane_mask_nxv16i1_imm256() vscale_range(16, 16) {
454454
; CHECK-LABEL: lane_mask_nxv16i1_imm256:
455455
; CHECK: // %bb.0: // %entry
456-
; CHECK-NEXT: ptrue p0.b, vl256
456+
; CHECK-NEXT: ptrue p0.b
457457
; CHECK-NEXT: ret
458458
entry:
459459
%active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256)

llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr %
208208
; CHECK: // %bb.0:
209209
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
210210
; CHECK-NEXT: addvl sp, sp, #-1
211-
; CHECK-NEXT: ptrue p0.d, vl8
211+
; CHECK-NEXT: ptrue p0.d
212212
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1]
213213
; CHECK-NEXT: addvl sp, sp, #1
214214
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload

llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale
563563
define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
564564
; CHECK-LABEL: splice_nxv16i8_neg256:
565565
; CHECK: // %bb.0:
566-
; CHECK-NEXT: ptrue p0.b, vl256
566+
; CHECK-NEXT: ptrue p0.b
567567
; CHECK-NEXT: rev p0.b, p0.b
568568
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
569569
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
110110
define <vscale x 16 x i1> @whilele_b_vl_maximum() vscale_range(16, 16) {
111111
; CHECK-LABEL: whilele_b_vl_maximum:
112112
; CHECK: // %bb.0:
113-
; CHECK-NEXT: ptrue p0.b, vl256
113+
; CHECK-NEXT: ptrue p0.b
114114
; CHECK-NEXT: ret
115115
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255)
116116
ret <vscale x 16 x i1> %out
@@ -245,7 +245,7 @@ define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
245245
define <vscale x 16 x i1> @whilelo_b_vl_maximum() vscale_range(16, 16) {
246246
; CHECK-LABEL: whilelo_b_vl_maximum:
247247
; CHECK: // %bb.0:
248-
; CHECK-NEXT: ptrue p0.b, vl256
248+
; CHECK-NEXT: ptrue p0.b
249249
; CHECK-NEXT: ret
250250
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256)
251251
ret <vscale x 16 x i1> %out
@@ -370,7 +370,7 @@ define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
370370
define <vscale x 16 x i1> @whilels_b_ii_vl_maximum() vscale_range(16, 16) {
371371
; CHECK-LABEL: whilels_b_ii_vl_maximum:
372372
; CHECK: // %bb.0:
373-
; CHECK-NEXT: ptrue p0.b, vl256
373+
; CHECK-NEXT: ptrue p0.b
374374
; CHECK-NEXT: ret
375375
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255)
376376
ret <vscale x 16 x i1> %out
@@ -504,7 +504,7 @@ define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
504504
define <vscale x 16 x i1> @whilelt_b_ii_vl_maximum() vscale_range(16, 16) {
505505
; CHECK-LABEL: whilelt_b_ii_vl_maximum:
506506
; CHECK: // %bb.0:
507-
; CHECK-NEXT: ptrue p0.b, vl256
507+
; CHECK-NEXT: ptrue p0.b
508508
; CHECK-NEXT: ret
509509
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256)
510510
ret <vscale x 16 x i1> %out

0 commit comments

Comments
 (0)