-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AArch64][ISel] Extend insertelement tests #173003
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This in preparation to adding a DAG combiner for turning INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT
|
@llvm/pr-subscribers-backend-aarch64 Author: Gaëtan Bossu (gbossu) ChangesThis in preparation to adding a DAG combiner for turning INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT Patch is 20.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173003.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index 8ca005a88add3..5cc54929756ff 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -165,31 +165,378 @@ define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
ret <vscale x 16 x i8> %b
}
+; TODO: Implement DAG combiner.
+; INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT
-; Redundant lane insert
-define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: extract_insert_4xi32:
+define <vscale x 16 x i8> @test_lanex_16xi8_poison(i8 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_16xi8_poison:
; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.b, #0, #1
+; CHECK-NEXT: mov w8, w1
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: mov z0.b, p0/m, w0
; CHECK-NEXT: ret
- %b = extractelement <vscale x 4 x i32> %a, i32 2
- %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
- ret <vscale x 4 x i32> %c
+ %b = insertelement <vscale x 16 x i8> poison, i8 %e, i32 %x
+ ret <vscale x 16 x i8> %b
}
-define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
-; CHECK-LABEL: test_lane6_undef_8xi16:
+define <vscale x 16 x i8> @test_lanex_16xi8_poison_imm(i8 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_16xi8_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.b, #0, #1
+; CHECK-NEXT: mov w8, w1
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: mov z1.b, w8
+; CHECK-NEXT: mov w8, #5 // =0x5
+; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT: mov z0.b, p0/m, w8
+; CHECK-NEXT: ret
+ %b = insertelement <vscale x 16 x i8> poison, i8 5, i32 %x
+ ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 8 x i16> @test_lanex_8xi16_poison(i16 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_8xi16_poison:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #6 // =0x6
; CHECK-NEXT: index z0.h, #0, #1
+; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/m, w0
; CHECK-NEXT: ret
- %b = insertelement <vscale x 8 x i16> poison, i16 %a, i32 6
+ %b = insertelement <vscale x 8 x i16> poison, i16 %e, i32 %x
ret <vscale x 8 x i16> %b
}
+define <vscale x 8 x i16> @test_lanex_8xi16_poison_imm(i32 %x) {
+; CHECK-LABEL: test_lanex_8xi16_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, #0, #1
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: mov w8, #5 // =0x5
+; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z0.h, p0/m, w8
+; CHECK-NEXT: ret
+ %b = insertelement <vscale x 8 x i16> poison, i16 5, i32 %x
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 4 x i32> @test_lanex_4xi32_poison(i32 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_4xi32_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #0, #1
+; CHECK-NEXT: mov w8, w1
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z0.s, p0/m, w0
+; CHECK-NEXT: ret
+ %b = insertelement <vscale x 4 x i32> poison, i32 %e, i32 %x
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 4 x i32> @test_lanex_4xi32_poison_imm(i32 %x) {
+; CHECK-LABEL: test_lanex_4xi32_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #0, #1
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: mov w8, #5 // =0x5
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: mov z0.s, p0/m, w8
+; CHECK-NEXT: ret
+ %b = insertelement <vscale x 4 x i32> poison, i32 5, i32 %x
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 2 x i64> @test_lanex_2xi64_poison(i64 %e, i32 %x) {
+; CHECK-LABEL: test_lanex_2xi64_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #1
+; CHECK-NEXT: mov w8, w1
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, p0/m, x0
+; CHECK-NEXT: ret
+ %b = insertelement <vscale x 2 x i64> poison, i64 %e, i32 %x
+ ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x i64> @test_lanex_2xi64_poison_imm(i32 %x) {
+; CHECK-LABEL: test_lanex_2xi64_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #1
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: mov w8, #5 // =0x5
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: ret
+ %b = insertelement <vscale x 2 x i64> poison, i64 5, i32 %x
+ ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x half> @test_lanex_nxv2f16_poison(half %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f16_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.d, #0, #1
+; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 2 x half> @test_lanex_nxv2f16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f16_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #1
+; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: fmov h0, #1.50000000
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x half> poison, half 1.5, i64 %idx
+ ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x half> @test_lanex_nxv4f16_poison(half %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f16_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.s, #0, #1
+; CHECK-NEXT: mov z2.s, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 4 x half> @test_lanex_nxv4f16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f16_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #0, #1
+; CHECK-NEXT: mov z1.s, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fmov h0, #1.50000000
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 4 x half> poison, half 1.5, i64 %idx
+ ret <vscale x 4 x half> %res
+}
+
+define <vscale x 8 x half> @test_lanex_nxv8f16_poison(half %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8f16_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.h, #0, #1
+; CHECK-NEXT: mov z2.h, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 8 x half> @test_lanex_nxv8f16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8f16_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, #0, #1
+; CHECK-NEXT: mov z1.h, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fmov h0, #1.50000000
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 8 x half> poison, half 1.5, i64 %idx
+ ret <vscale x 8 x half> %res
+}
+
+define <vscale x 2 x bfloat> @test_lanex_nxv2bf16_undef(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2bf16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.d, #0, #1
+; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
+ ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @test_lanex_nxv2bf16_undef_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2bf16_undef_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #1
+; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: fmov h0, #1.93750000
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x bfloat> poison, bfloat 1.5, i64 %idx
+ ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @test_lanex_nxv4bf16_undef(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4bf16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.s, #0, #1
+; CHECK-NEXT: mov z2.s, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @test_lanex_nxv4bf16_undef_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4bf16_undef_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #0, #1
+; CHECK-NEXT: mov z1.s, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fmov h0, #1.93750000
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 4 x bfloat> poison, bfloat 1.5, i64 %idx
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @test_lanex_nxv8bf16_poison(bfloat %h, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8bf16_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.h, #0, #1
+; CHECK-NEXT: mov z2.h, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
+ ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @test_lanex_nxv8bf16_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv8bf16_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, #0, #1
+; CHECK-NEXT: mov z1.h, w0
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: fmov h0, #1.93750000
+; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 8 x bfloat> poison, bfloat 1.5, i64 %idx
+ ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 2 x float> @test_lanex_nxv2f32_poison(float %f, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f32_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.d, #0, #1
+; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.s, p0/m, s0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x float> @test_lanex_nxv2f32_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f32_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #1
+; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: fmov s0, #1.50000000
+; CHECK-NEXT: mov z0.s, p0/m, s0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x float> poison, float 1.5, i64 %idx
+ ret <vscale x 2 x float> %res
+}
+
+define <vscale x 4 x float> @test_lanex_nxv4f32_poison(float %f, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f32_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.s, #0, #1
+; CHECK-NEXT: mov z2.s, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, s0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 4 x float> @test_lanex_nxv4f32_poison_imm(float %f, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv4f32_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #0, #1
+; CHECK-NEXT: mov z1.s, w0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fmov s0, #1.50000000
+; CHECK-NEXT: mov z0.s, p0/m, s0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 4 x float> poison, float 1.5, i64 %idx
+ ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_lanex_nxv2f64_poison(double %d, i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f64_poison:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z1.d, #0, #1
+; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: mov z0.d, p0/m, d0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x double> poison, double %d, i64 %idx
+ ret <vscale x 2 x double> %res
+}
+
+define <vscale x 2 x double> @test_lanex_nxv2f64_poison_imm(i64 %idx) {
+; CHECK-LABEL: test_lanex_nxv2f64_poison_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #0, #1
+; CHECK-NEXT: mov z1.d, x0
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT: fmov d0, #1.50000000
+; CHECK-NEXT: mov z0.d, p0/m, d0
+; CHECK-NEXT: ret
+ %res = insertelement <vscale x 2 x double> poison, double 1.5, i64 %idx
+ ret <vscale x 2 x double> %res
+}
+
+; Redundant lane insert
+define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: extract_insert_4xi32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ret
+ %b = extractelement <vscale x 4 x i32> %a, i32 2
+ %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
+ ret <vscale x 4 x i32> %c
+}
+
define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
; CHECK-LABEL: test_lane0_undef_16xi8:
; CHECK: // %bb.0:
@@ -326,120 +673,120 @@ define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
}
; Insert scalar at index
-define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
+define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %a, half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.d, #0, #1
-; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: index z2.d, #0, #1
+; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
+ %res = insertelement <vscale x 2 x half> %a, half %h, i64 %idx
ret <vscale x 2 x half> %res
}
-define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
+define <vscale x 4 x half> @test_insert_with_index_nxv4f16(<vscale x 4 x half> %a, half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.s, #0, #1
-; CHECK-NEXT: mov z2.s, w0
+; CHECK-NEXT: index z2.s, #0, #1
+; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
+ %res = insertelement <vscale x 4 x half> %a, half %h, i64 %idx
ret <vscale x 4 x half> %res
}
-define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
+define <vscale x 8 x half> @test_insert_with_index_nxv8f16(<vscale x 8 x half> %a, half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.h, #0, #1
-; CHECK-NEXT: mov z2.h, w0
+; CHECK-NEXT: index z2.h, #0, #1
+; CHECK-NEXT: mov z3.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
+ %res = insertelement <vscale x 8 x half> %a, half %h, i64 %idx
ret <vscale x 8 x half> %res
}
-define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
+define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(<vscale x 2 x bfloat> %a, bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.d, #0, #1
-; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: index z2.d, #0, #1
+; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
+ %res = insertelement <vscale x 2 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 2 x bfloat> %res
}
-define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
+define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(<vscale x 4 x bfloat> %a, bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.s, #0, #1
-; CHECK-NEXT: mov z2.s, w0
+; CHECK-NEXT: index z2.s, #0, #1
+; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
+ %res = insertelement <vscale x 4 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 4 x bfloat> %res
}
-define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
+define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.h, #0, #1
-; CHECK-NEXT: mov z2.h, w0
+; CHECK-NEXT: index z2.h, #0, #1
+; CHECK-NEXT: mov z3.h, w0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p0/m, h0
+; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
+; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
+ %res = insertelement <vscale x 8 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 8 x bfloat> %res
}
-define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
+define <vscale x 2 x float> @test_insert_with_index_nxv2f32(<vscale x 2 x float> %a, float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.d, #0, #1
-; CHECK-NEXT: mov z2.d, x0
+; CHECK-NEXT: index z2.d, #0, #1
+; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: mov z0.s, p0/m, s0
+; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: mov z0.s, p0/m, s1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
+ %res = insertelement <vscale x 2 x float> %a, float %f, i64 %idx
ret <vscale x 2 x float> %res
}
-define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
+define <vscale x 4 x float> @test_insert_with_index_nxv4f32(<vscale x 4 x float> %a, float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z1.s, #0, #1
-; CHECK-NEXT: mov z2.s, w0
+; CHECK-NEXT: index z2.s, #0, #1
+; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, s0
+; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
+; CHECK-NEXT: mov z0.s, p0/m, s1
; CHECK-NEXT: ret
- %res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
+ %res = insertelement <vscale x 4 x float> %a, float %f, i64 %idx
ret <vscale x 4 x float> %res
}
-define <vscale ...
[truncated]
|
This comment was marked as outdated.
This comment was marked as outdated.
| ret <vscale x 16 x i8> %b | ||
| } | ||
|
|
||
| define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Deleted this, as it's superseded by all the tests below for testing INSERT_VECTOR_ELT(poison, ...)
|
|
||
| ; Insert scalar at index | ||
| define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) { | ||
| define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %a, half %h, i64 %idx) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These tests below were meant to be generic, so I replaced poison values with real ones.
| ret <vscale x 4 x bfloat> %res | ||
| } | ||
|
|
||
| define <vscale x 8 x bfloat> @test_lanex_nxv8bf16_poison(bfloat %h, i64 %idx) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it necessary to test all these type combinations? At the very least bfloat and half are going to be pretty much identical.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree. We normally have tests for each isel pattern, but don't typically need tests for every element type involved in a DAGCombine because the combines are typically agnostic of the type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd love to agree, but SDAG doesn't really give the option to test the DAG combiner independently. It's just a black box with IR as the input and MIR/asm as output. So I think it's fair to check all inputs if we need to verify the whole box. If I could easily write DAG->DAG or MIR->MIR tests, my answer would be different. Note I gave a similar justification in the next PR: #173005 (comment) .
PS: I don't mind deleting some tests if you guys really disagree, I also hate them :D I just feel they are necessary due to the limitations of SDAG testing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need DAG->DAG tests? The point of unit tests is to ensure every line of new code is tested. #173005 is effectively a two line DAG combine that looks easily testable with a few llc tests (zero index, var index, undef invec, non-undef invective) some of which might already exist.
As the combine does not care about the vector or element types (other than to require a scalable vector), there is nothing in that domain to test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I get your point, but I don't see IR->asm tests as unit tests because they test the whole backend. If you're fine approximating them as type-agnostic unit tests for the combiner, then I'm fine keeping only one or two.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes please. I'm pretty sure this is the norm, so others do pipe up if I'm misrepresenting something.
This treats the IR->asm test as a unit test.
This in preparation to adding a DAG combiner for turning INSERT_VECTOR_ELT(undef, ...) -> VECTOR_SPLAT
This is a stacked PR: