Skip to content

Conversation

@gbossu
Copy link
Contributor

@gbossu gbossu commented Dec 19, 2025

This in preparation to adding a DAG combiner for turning INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT

This is a stacked PR:

This in preparation to adding a DAG combiner for turning
INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT
@llvmbot
Copy link
Member

llvmbot commented Dec 19, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Gaëtan Bossu (gbossu)

Changes

This in preparation to adding a DAG combiner for turning INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT


Patch is 20.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173003.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/AArch64/sve-insert-element.ll (+411-64)
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index 8ca005a88add3..5cc54929756ff 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -165,31 +165,378 @@ define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
   ret <vscale x 16 x i8> %b
 }
 
+; TODO: Implement DAG combiner.
+; INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT
 
-; Redundant lane insert
-define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: extract_insert_4xi32:
+define <vscale x 16 x i8> @test_lanex_16xi8_poison(i8 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_16xi8_poison:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    mov w8, w1
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z1.b, w8
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    mov z0.b, p0/m, w0
 ; CHECK-NEXT:    ret
-  %b = extractelement <vscale x 4 x i32> %a, i32 2
-  %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
-  ret <vscale x 4 x i32> %c
+  %b = insertelement <vscale x 16 x i8> poison, i8 %e, i32 %x
+  ret <vscale x 16 x i8> %b
 }
 
-define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
-; CHECK-LABEL: test_lane6_undef_8xi16:
+define <vscale x 16 x i8> @test_lanex_16xi8_poison_imm(i8 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_16xi8_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    mov w8, w1
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z1.b, w8
+; CHECK-NEXT:    mov w8, #5 // =0x5
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    mov z0.b, p0/m, w8
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 16 x i8> poison, i8 5, i32 %x
+  ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 8 x i16> @test_lanex_8xi16_poison(i16 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_8xi16_poison:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #6 // =0x6
 ; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z1.h, w8
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w0
 ; CHECK-NEXT:    ret
-  %b = insertelement <vscale x 8 x i16> poison, i16 %a, i32 6
+  %b = insertelement <vscale x 8 x i16> poison, i16 %e, i32 %x
   ret <vscale x 8 x i16> %b
 }
 
+define <vscale x 8 x i16> @test_lanex_8xi16_poison_imm(i32 %x) {
+; CHECK-LABEL: test_lanex_8xi16_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    mov w8, #5 // =0x5
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    mov z0.h, p0/m, w8
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 8 x i16> poison, i16 5, i32 %x
+  ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 4 x i32> @test_lanex_4xi32_poison(i32 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_4xi32_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov w8, w1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    mov z0.s, p0/m, w0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 4 x i32> poison, i32 %e, i32 %x
+  ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 4 x i32> @test_lanex_4xi32_poison_imm(i32 %x) {
+; CHECK-LABEL: test_lanex_4xi32_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mov w8, #5 // =0x5
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    mov z0.s, p0/m, w8
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 4 x i32> poison, i32 5, i32 %x
+  ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 2 x i64> @test_lanex_2xi64_poison(i64 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_2xi64_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    mov w8, w1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    mov z0.d, p0/m, x0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 2 x i64> poison, i64 %e, i32 %x
+  ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x i64> @test_lanex_2xi64_poison_imm(i32 %x) {
+; CHECK-LABEL: test_lanex_2xi64_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov w8, #5 // =0x5
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    mov z0.d, p0/m, x8
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 2 x i64> poison, i64 5, i32 %x
+  ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x half> @test_lanex_nxv2f16_poison(half %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f16_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 2 x half> @test_lanex_nxv2f16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f16_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    fmov h0, #1.50000000
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x half> poison, half 1.5, i64 %idx
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x half> @test_lanex_nxv4f16_poison(half %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f16_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.s, #0, #1
+; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 4 x half> @test_lanex_nxv4f16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f16_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov z1.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    fmov h0, #1.50000000
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x half> poison, half 1.5, i64 %idx
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 8 x half> @test_lanex_nxv8f16_poison(half %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8f16_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.h, #0, #1
+; CHECK-NEXT:    mov z2.h, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 8 x half> @test_lanex_nxv8f16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8f16_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    mov z1.h, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    fmov h0, #1.50000000
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 8 x half> poison, half 1.5, i64 %idx
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 2 x bfloat> @test_lanex_nxv2bf16_undef(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2bf16_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
+  ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @test_lanex_nxv2bf16_undef_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2bf16_undef_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    fmov h0, #1.93750000
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x bfloat> poison, bfloat 1.5, i64 %idx
+  ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @test_lanex_nxv4bf16_undef(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4bf16_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.s, #0, #1
+; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
+  ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @test_lanex_nxv4bf16_undef_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4bf16_undef_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov z1.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    fmov h0, #1.93750000
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x bfloat> poison, bfloat 1.5, i64 %idx
+  ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @test_lanex_nxv8bf16_poison(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8bf16_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.h, #0, #1
+; CHECK-NEXT:    mov z2.h, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
+  ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @test_lanex_nxv8bf16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8bf16_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    mov z1.h, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    fmov h0, #1.93750000
+; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 8 x bfloat> poison, bfloat 1.5, i64 %idx
+  ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 2 x float> @test_lanex_nxv2f32_poison(float %f, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f32_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x float> @test_lanex_nxv2f32_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f32_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    fmov s0, #1.50000000
+; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x float> poison, float 1.5, i64 %idx
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 4 x float> @test_lanex_nxv4f32_poison(float %f, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f32_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.s, #0, #1
+; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 4 x float> @test_lanex_nxv4f32_poison_imm(float %f, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f32_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov z1.s, w0
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    fmov s0, #1.50000000
+; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 4 x float> poison, float 1.5, i64 %idx
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_lanex_nxv2f64_poison(double %d, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f64_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z1.d, #0, #1
+; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    mov z0.d, p0/m, d0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x double> poison, double %d, i64 %idx
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 2 x double> @test_lanex_nxv2f64_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f64_poison_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    fmov d0, #1.50000000
+; CHECK-NEXT:    mov z0.d, p0/m, d0
+; CHECK-NEXT:    ret
+  %res = insertelement <vscale x 2 x double> poison, double 1.5, i64 %idx
+  ret <vscale x 2 x double> %res
+}
+
+; Redundant lane insert
+define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: extract_insert_4xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %b = extractelement <vscale x 4 x i32> %a, i32 2
+  %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
+  ret <vscale x 4 x i32> %c
+}
+
 define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
 ; CHECK-LABEL: test_lane0_undef_16xi8:
 ; CHECK:       // %bb.0:
@@ -326,120 +673,120 @@ define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
 }
 
 ; Insert scalar at index
-define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
+define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %a, half %h, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.d, #0, #1
-; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    mov z3.d, x0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT:    mov z0.h, p0/m, h1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
+  %res = insertelement <vscale x 2 x half> %a, half %h, i64 %idx
   ret <vscale x 2 x half> %res
 }
 
-define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
+define <vscale x 4 x half> @test_insert_with_index_nxv4f16(<vscale x 4 x half> %a, half %h, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.s, #0, #1
-; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    index z2.s, #0, #1
+; CHECK-NEXT:    mov z3.s, w0
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT:    mov z0.h, p0/m, h1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
+  %res = insertelement <vscale x 4 x half> %a, half %h, i64 %idx
   ret <vscale x 4 x half> %res
 }
 
-define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
+define <vscale x 8 x half> @test_insert_with_index_nxv8f16(<vscale x 8 x half> %a, half %h, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.h, #0, #1
-; CHECK-NEXT:    mov z2.h, w0
+; CHECK-NEXT:    index z2.h, #0, #1
+; CHECK-NEXT:    mov z3.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT:    mov z0.h, p0/m, h1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
+  %res = insertelement <vscale x 8 x half> %a, half %h, i64 %idx
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
+define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(<vscale x 2 x bfloat> %a, bfloat %h, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv2bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.d, #0, #1
-; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    mov z3.d, x0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT:    mov z0.h, p0/m, h1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
+  %res = insertelement <vscale x 2 x bfloat> %a, bfloat %h, i64 %idx
   ret <vscale x 2 x bfloat> %res
 }
 
-define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
+define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(<vscale x 4 x bfloat> %a, bfloat %h, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv4bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.s, #0, #1
-; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    index z2.s, #0, #1
+; CHECK-NEXT:    mov z3.s, w0
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT:    mov z0.h, p0/m, h1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
+  %res = insertelement <vscale x 4 x bfloat> %a, bfloat %h, i64 %idx
   ret <vscale x 4 x bfloat> %res
 }
 
-define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
+define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %h, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv8bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.h, #0, #1
-; CHECK-NEXT:    mov z2.h, w0
+; CHECK-NEXT:    index z2.h, #0, #1
+; CHECK-NEXT:    mov z3.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT:    mov z0.h, p0/m, h0
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT:    mov z0.h, p0/m, h1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
+  %res = insertelement <vscale x 8 x bfloat> %a, bfloat %h, i64 %idx
   ret <vscale x 8 x bfloat> %res
 }
 
-define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
+define <vscale x 2 x float> @test_insert_with_index_nxv2f32(<vscale x 2 x float> %a, float %f, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.d, #0, #1
-; CHECK-NEXT:    mov z2.d, x0
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    mov z3.d, x0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT:    mov z0.s, p0/m, s1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
+  %res = insertelement <vscale x 2 x float> %a, float %f, i64 %idx
   ret <vscale x 2 x float> %res
 }
 
-define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
+define <vscale x 4 x float> @test_insert_with_index_nxv4f32(<vscale x 4 x float> %a, float %f, i64 %idx) {
 ; CHECK-LABEL: test_insert_with_index_nxv4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    index z1.s, #0, #1
-; CHECK-NEXT:    mov z2.s, w0
+; CHECK-NEXT:    index z2.s, #0, #1
+; CHECK-NEXT:    mov z3.s, w0
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT:    mov z0.s, p0/m, s0
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT:    mov z0.s, p0/m, s1
 ; CHECK-NEXT:    ret
-  %res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
+  %res = insertelement <vscale x 4 x float> %a, float %f, i64 %idx
   ret <vscale x 4 x float> %res
 }
 
-define <vscale ...
[truncated]

@github-actions

This comment was marked as outdated.

ret <vscale x 16 x i8> %b
}

define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deleted this, as it's superseded by all the tests below for testing INSERT_VECTOR_ELT(poison, ...)


; Insert scalar at index
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %a, half %h, i64 %idx) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests below were meant to be generic, so I replaced poison values with real ones.

ret <vscale x 4 x bfloat> %res
}

define <vscale x 8 x bfloat> @test_lanex_nxv8bf16_poison(bfloat %h, i64 %idx) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to test all these type combinations? At the very least bfloat and half are going to be pretty much identical.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. We normally have tests for each isel pattern, but don't typically need tests for every element type involved in a DAGCombine because the combines are typically agnostic of the type.

Copy link
Contributor Author

@gbossu gbossu Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd love to agree, but SDAG doesn't really give the option to test the DAG combiner independently. It's just a black box with IR as the input and MIR/asm as output. So I think it's fair to check all inputs if we need to verify the whole box. If I could easily write DAG->DAG or MIR->MIR tests, my answer would be different. Note I gave a similar justification in the next PR: #173005 (comment) .

PS: I don't mind deleting some tests if you guys really disagree, I also hate them :D I just feel they are necessary due to the limitations of SDAG testing.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need DAG->DAG tests? The point of unit tests is to ensure every line of new code is tested. #173005 is effectively a two line DAG combine that looks easily testable with a few llc tests (zero index, var index, undef invec, non-undef invective) some of which might already exist.

As the combine does not care about the vector or element types (other than to require a scalable vector), there is nothing in that domain to test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get your point, but I don't see IR->asm tests as unit tests because they test the whole backend. If you're fine approximating them as type-agnostic unit tests for the combiner, then I'm fine keeping only one or two.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes please. I'm pretty sure this is the norm, so others do pipe up if I'm misrepresenting something.

This treats the IR->asm test as a unit test.
@gbossu gbossu merged commit 39b24e8 into main Dec 19, 2025
10 checks passed
@gbossu gbossu deleted the users/gbossu/gbossu.insert.into.undef.1 branch December 19, 2025 16:30
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants