[AArch64][SVE] Upgrade PTRUE patterns to ALL when they match vector length. (#172993)

rj-jesus · web-flow · commit b047a0e1e56f · 2025-12-22T12:24:16.000Z
When the number of active elements of a PTRUE pattern matches the
scalable vector length, we can upgrade the pattern to ALL. This enables
CSE with similar PTRUEs as well as other simplifications.

There was similar logic in `getPredicateForFixedLengthVector`, which
I've removed as it should no longer be needed with this change.

This change should also make the VLS matching in `isAllActivePredicate`
redundant, which I've also removed.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5894,6 +5894,17 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
                                int Pattern) {
   if (Pattern == AArch64SVEPredPattern::all)
     return DAG.getConstant(1, DL, VT);
+
+  // When the number of active elements of a pattern matches the scalable vector
+  // length, we can upgrade the pattern to ALL and emit a splat instead.
+  if (unsigned PatNumElts = getNumElementsFromSVEPredPattern(Pattern)) {
+    const AArch64Subtarget &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+    unsigned NumElts = VT.getVectorMinNumElements();
+    unsigned VScale = Subtarget.getSVEVectorSizeInBits() / 128;
+    if (PatNumElts == (NumElts * VScale))
+      return DAG.getConstant(1, DL, VT);
+  }
+
   return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
                      DAG.getTargetConstant(Pattern, DL, MVT::i32));
 }
@@ -15336,20 +15347,6 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
       N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
     return N.getValueType().getVectorMinNumElements() >= NumElts;
 
-  // If we're compiling for a specific vector-length, we can check if the
-  // pattern's VL equals that of the scalable vector at runtime.
-  if (N.getOpcode() == AArch64ISD::PTRUE) {
-    const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
-    unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
-    unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
-    if (MaxSVESize && MinSVESize == MaxSVESize) {
-      unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
-      unsigned PatNumElts =
-          getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
-      return PatNumElts == (NumElts * VScale);
-    }
-  }
-
   return false;
 }
 
@@ -30337,16 +30334,6 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
       getSVEPredPatternFromNumElements(VT.getVectorNumElements());
   assert(PgPattern && "Unexpected element count for SVE predicate");
 
-  // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
-  // AArch64SVEPredPattern::all, which can enable the use of unpredicated
-  // variants of instructions when available.
-  const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
-  unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
-  unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
-  if (MaxSVESize && MinSVESize == MaxSVESize &&
-      MaxSVESize == VT.getSizeInBits())
-    PgPattern = AArch64SVEPredPattern::all;
-
   MVT MaskVT;
   switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
   default:
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -453,7 +453,7 @@ entry:
 define <vscale x 16 x i1> @lane_mask_nxv16i1_imm256() vscale_range(16, 16) {
 ; CHECK-LABEL: lane_mask_nxv16i1_imm256:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ptrue p0.b, vl256
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ret
 entry:
   %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256)
diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll
@@ -208,7 +208,7 @@ define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr %
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p0.d, vl8
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x1]
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -563,7 +563,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale
 define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg256:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b, vl256
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    splice z0.b, p0, z0.b, z1.b
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -110,7 +110,7 @@ define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 define <vscale x 16 x i1> @whilele_b_vl_maximum() vscale_range(16, 16) {
 ; CHECK-LABEL: whilele_b_vl_maximum:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b, vl256
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255)
   ret <vscale x 16 x i1> %out
@@ -245,7 +245,7 @@ define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 define <vscale x 16 x i1> @whilelo_b_vl_maximum() vscale_range(16, 16) {
 ; CHECK-LABEL: whilelo_b_vl_maximum:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b, vl256
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256)
   ret <vscale x 16 x i1> %out
@@ -370,7 +370,7 @@ define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 define <vscale x 16 x i1> @whilels_b_ii_vl_maximum() vscale_range(16, 16) {
 ; CHECK-LABEL: whilels_b_ii_vl_maximum:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b, vl256
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255)
   ret <vscale x 16 x i1> %out
@@ -504,7 +504,7 @@ define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 define <vscale x 16 x i1> @whilelt_b_ii_vl_maximum() vscale_range(16, 16) {
 ; CHECK-LABEL: whilelt_b_ii_vl_maximum:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b, vl256
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256)
   ret <vscale x 16 x i1> %out