Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 88 additions & 68 deletions llvm/test/CodeGen/AArch64/sve-insert-element.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,39 @@ define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
ret <vscale x 16 x i8> %b
}

; TODO: Implement DAG combiner.
; Test the INSERT_VECTOR_ELT(poison, ...) -> VECTOR_SPLAT combiner
; <vscale x 16 x i8> is used as a proxy for testing using IR, but the combiner
; is agnostic of the element type.

define <vscale x 16 x i8> @test_lanex_16xi8_poison(i8 %e, i32 %x) {
; CHECK-LABEL: test_lanex_16xi8_poison:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/m, w0
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> poison, i8 %e, i32 %x
ret <vscale x 16 x i8> %b
}

define <vscale x 16 x i8> @test_lanex_16xi8_poison_imm(i8 %e, i32 %x) {
; CHECK-LABEL: test_lanex_16xi8_poison_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov w8, w1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z1.b, w8
; CHECK-NEXT: mov w8, #5 // =0x5
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: ret
%b = insertelement <vscale x 16 x i8> poison, i8 5, i32 %x
ret <vscale x 16 x i8> %b
}

; Redundant lane insert
define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
Expand All @@ -176,20 +209,7 @@ define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deleted this, as it's superseded by all the tests below for testing INSERT_VECTOR_ELT(poison, ...)

; CHECK-LABEL: test_lane6_undef_8xi16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #6 // =0x6
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z0.h, p0/m, w0
; CHECK-NEXT: ret
%b = insertelement <vscale x 8 x i16> poison, i16 %a, i32 6
ret <vscale x 8 x i16> %b
}

; Inserting lane 0 into poison uses fmov instead of broadcasting to all lanes
define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
; CHECK-LABEL: test_lane0_undef_16xi8:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -326,120 +346,120 @@ define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
}

; Insert scalar at index
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
define <vscale x 2 x half> @test_insert_with_index_nxv2f16(<vscale x 2 x half> %a, half %h, i64 %idx) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests below were meant to be generic, so I replaced poison values with real ones.

; CHECK-LABEL: test_insert_with_index_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x half> poison, half %h, i64 %idx
%res = insertelement <vscale x 2 x half> %a, half %h, i64 %idx
ret <vscale x 2 x half> %res
}

define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
define <vscale x 4 x half> @test_insert_with_index_nxv4f16(<vscale x 4 x half> %a, half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x half> poison, half %h, i64 %idx
%res = insertelement <vscale x 4 x half> %a, half %h, i64 %idx
ret <vscale x 4 x half> %res
}

define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
define <vscale x 8 x half> @test_insert_with_index_nxv8f16(<vscale x 8 x half> %a, half %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: mov z3.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x half> poison, half %h, i64 %idx
%res = insertelement <vscale x 8 x half> %a, half %h, i64 %idx
ret <vscale x 8 x half> %res
}

define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) {
define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(<vscale x 2 x bfloat> %a, bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x bfloat> poison, bfloat %h, i64 %idx
%res = insertelement <vscale x 2 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 2 x bfloat> %res
}

define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) {
define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(<vscale x 4 x bfloat> %a, bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x bfloat> poison, bfloat %h, i64 %idx
%res = insertelement <vscale x 4 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 4 x bfloat> %res
}

define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) {
define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %h, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.h, #0, #1
; CHECK-NEXT: mov z2.h, w0
; CHECK-NEXT: index z2.h, #0, #1
; CHECK-NEXT: mov z3.h, w0
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h
; CHECK-NEXT: mov z0.h, p0/m, h0
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%res = insertelement <vscale x 8 x bfloat> poison, bfloat %h, i64 %idx
%res = insertelement <vscale x 8 x bfloat> %a, bfloat %h, i64 %idx
ret <vscale x 8 x bfloat> %res
}

define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
define <vscale x 2 x float> @test_insert_with_index_nxv2f32(<vscale x 2 x float> %a, float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.s, p0/m, s0
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
; CHECK-NEXT: mov z0.s, p0/m, s1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x float> poison, float %f, i64 %idx
%res = insertelement <vscale x 2 x float> %a, float %f, i64 %idx
ret <vscale x 2 x float> %res
}

define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
define <vscale x 4 x float> @test_insert_with_index_nxv4f32(<vscale x 4 x float> %a, float %f, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.s, #0, #1
; CHECK-NEXT: mov z2.s, w0
; CHECK-NEXT: index z2.s, #0, #1
; CHECK-NEXT: mov z3.s, w0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, s0
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
; CHECK-NEXT: mov z0.s, p0/m, s1
; CHECK-NEXT: ret
%res = insertelement <vscale x 4 x float> poison, float %f, i64 %idx
%res = insertelement <vscale x 4 x float> %a, float %f, i64 %idx
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
define <vscale x 2 x double> @test_insert_with_index_nxv2f64(<vscale x 2 x double> %a, double %d, i64 %idx) {
; CHECK-LABEL: test_insert_with_index_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: index z1.d, #0, #1
; CHECK-NEXT: mov z2.d, x0
; CHECK-NEXT: index z2.d, #0, #1
; CHECK-NEXT: mov z3.d, x0
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, d0
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
; CHECK-NEXT: mov z0.d, p0/m, d1
; CHECK-NEXT: ret
%res = insertelement <vscale x 2 x double> poison, double %d, i64 %idx
%res = insertelement <vscale x 2 x double> %a, double %d, i64 %idx
ret <vscale x 2 x double> %res
}

Expand Down