diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll index 8ca005a88add3..a897850c1365d 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -165,6 +165,39 @@ define @test_lanex_16xi8( %a, i32 %x) { ret %b } +; TODO: Implement DAG combiner. +; Test the INSERT_VECTOR_ELT(poison, ...) -> VECTOR_SPLAT combiner +; is used as a proxy for testing using IR, but the combiner +; is agnostic of the element type. + +define @test_lanex_16xi8_poison(i8 %e, i32 %x) { +; CHECK-LABEL: test_lanex_16xi8_poison: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: mov z0.b, p0/m, w0 +; CHECK-NEXT: ret + %b = insertelement poison, i8 %e, i32 %x + ret %b +} + +define @test_lanex_16xi8_poison_imm(i8 %e, i32 %x) { +; CHECK-LABEL: test_lanex_16xi8_poison_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b +; CHECK-NEXT: mov z0.b, p0/m, w8 +; CHECK-NEXT: ret + %b = insertelement poison, i8 5, i32 %x + ret %b +} ; Redundant lane insert define @extract_insert_4xi32( %a) { @@ -176,20 +209,7 @@ define @extract_insert_4xi32( %a) { ret %c } -define @test_lane6_undef_8xi16(i16 %a) { -; CHECK-LABEL: test_lane6_undef_8xi16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 // =0x6 -; CHECK-NEXT: index z0.h, #0, #1 -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p0/m, w0 -; CHECK-NEXT: ret - %b = insertelement poison, i16 %a, i32 6 - ret %b -} - +; Inserting lane 0 into poison uses fmov instead of broadcasting to all lanes define @test_lane0_undef_16xi8(i8 %a) { ; CHECK-LABEL: test_lane0_undef_16xi8: ; CHECK: // %bb.0: @@ -326,120 +346,120 @@ define @test_insert_into_undef_nxv2f64(double %a) { } ; Insert scalar at index -define @test_insert_with_index_nxv2f16(half %h, i64 %idx) { +define @test_insert_with_index_nxv2f16( %a, half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: mov z3.d, x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret - %res = insertelement poison, half %h, i64 %idx + %res = insertelement %a, half %h, i64 %idx ret %res } -define @test_insert_with_index_nxv4f16(half %h, i64 %idx) { +define @test_insert_with_index_nxv4f16( %a, half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.s, #0, #1 -; CHECK-NEXT: mov z2.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 +; CHECK-NEXT: mov z3.s, w0 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s +; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret - %res = insertelement poison, half %h, i64 %idx + %res = insertelement %a, half %h, i64 %idx ret %res } -define @test_insert_with_index_nxv8f16(half %h, i64 %idx) { +define @test_insert_with_index_nxv8f16( %a, half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.h, #0, #1 -; CHECK-NEXT: mov z2.h, w0 +; CHECK-NEXT: index z2.h, #0, #1 +; CHECK-NEXT: mov z3.h, w0 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h -; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret - %res = insertelement poison, half %h, i64 %idx + %res = insertelement %a, half %h, i64 %idx ret %res } -define @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) { +define @test_insert_with_index_nxv2bf16( %a, bfloat %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: mov z3.d, x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret - %res = insertelement poison, bfloat %h, i64 %idx + %res = insertelement %a, bfloat %h, i64 %idx ret %res } -define @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) { +define @test_insert_with_index_nxv4bf16( %a, bfloat %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.s, #0, #1 -; CHECK-NEXT: mov z2.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 +; CHECK-NEXT: mov z3.s, w0 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s +; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret - %res = insertelement poison, bfloat %h, i64 %idx + %res = insertelement %a, bfloat %h, i64 %idx ret %res } -define @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) { +define @test_insert_with_index_nxv8bf16( %a, bfloat %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.h, #0, #1 -; CHECK-NEXT: mov z2.h, w0 +; CHECK-NEXT: index z2.h, #0, #1 +; CHECK-NEXT: mov z3.h, w0 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h -; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret - %res = insertelement poison, bfloat %h, i64 %idx + %res = insertelement %a, bfloat %h, i64 %idx ret %res } -define @test_insert_with_index_nxv2f32(float %f, i64 %idx) { +define @test_insert_with_index_nxv2f32( %a, float %f, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: mov z3.d, x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z0.s, p0/m, s0 +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: ret - %res = insertelement poison, float %f, i64 %idx + %res = insertelement %a, float %f, i64 %idx ret %res } -define @test_insert_with_index_nxv4f32(float %f, i64 %idx) { +define @test_insert_with_index_nxv4f32( %a, float %f, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.s, #0, #1 -; CHECK-NEXT: mov z2.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 +; CHECK-NEXT: mov z3.s, w0 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z0.s, p0/m, s0 +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s +; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: ret - %res = insertelement poison, float %f, i64 %idx + %res = insertelement %a, float %f, i64 %idx ret %res } -define @test_insert_with_index_nxv2f64(double %d, i64 %idx) { +define @test_insert_with_index_nxv2f64( %a, double %d, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: mov z3.d, x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z0.d, p0/m, d0 +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: mov z0.d, p0/m, d1 ; CHECK-NEXT: ret - %res = insertelement poison, double %d, i64 %idx + %res = insertelement %a, double %d, i64 %idx ret %res }