Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -4037,6 +4037,23 @@
(if-let true (u64_eq (ty_bytes ty) (u64_wrapping_shl 1 (shift_masked_imm shift_ty n))))
(amode_reg_scaled (amode_add x offset) y))

;; `base + (extended index)` plus a *non-zero* scaled-uimm12 offset: higher
;; priority than rules 4/5 to keep `base + index` as one CSE-able value shared
;; across offsets, folding the offset into the load. A zero offset instead falls
;; through to rules 4/5's single-instruction `RegExtended` form.
(rule 8 (amode_no_more_iconst ty val @ (iadd _ _ (uextend _ (value_type $I32))) offset)
(if-let uimm12 (uimm12_scaled_nonzero_from_i64 offset ty))
(AMode.UnsignedOffset val uimm12))
(rule 8 (amode_no_more_iconst ty val @ (iadd _ _ (sextend _ (value_type $I32))) offset)
(if-let uimm12 (uimm12_scaled_nonzero_from_i64 offset ty))
(AMode.UnsignedOffset val uimm12))
(rule 9 (amode_no_more_iconst ty val @ (iadd _ (uextend _ (value_type $I32)) _) offset)
(if-let uimm12 (uimm12_scaled_nonzero_from_i64 offset ty))
(AMode.UnsignedOffset val uimm12))
(rule 9 (amode_no_more_iconst ty val @ (iadd _ (sextend _ (value_type $I32)) _) offset)
(if-let uimm12 (uimm12_scaled_nonzero_from_i64 offset ty))
(AMode.UnsignedOffset val uimm12))

(attr amode_reg_scaled (veri chain))
(decl amode_reg_scaled (Reg Value) AMode)
(rule 0 (amode_reg_scaled base index)
Expand Down Expand Up @@ -4096,6 +4113,29 @@
(decl pure partial uimm12_scaled_from_i64 (i64 Type) UImm12Scaled)
(extern constructor uimm12_scaled_from_i64 uimm12_scaled_from_i64)

;; As `uimm12_scaled_from_i64`, but additionally requires a non-zero value.
(spec (uimm12_scaled_nonzero_from_i64 value ty)
(match
(let
(
(scale (bits2bytes! (int2bv 64 (:bits ty))))
(limit (bvmul (int2bv 64 4095) scale))
)
(and
(not (bv_is_zero! value))
(bvsge value (bvzero! 64))
(bvsle value limit)
(bv_is_zero! (bvand value (bvsub scale (bvone! 64))))
)
)
)
(provide
(= result (extract 11 0 (bvudiv value (bits2bytes! (int2bv 64 (:bits ty))))))
)
)
(decl pure partial uimm12_scaled_nonzero_from_i64 (i64 Type) UImm12Scaled)
(extern constructor uimm12_scaled_nonzero_from_i64 uimm12_scaled_nonzero_from_i64)

(spec (simm9_from_i64 value)
(provide (= value (sign_ext 64 result)))
(match
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,15 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
UImm12Scaled::maybe_from_i64(val, ty)
}

/// Like `uimm12_scaled_from_i64`, but rejects a zero value so `base + index
/// + 0` keeps its single-instruction `RegExtended` amode.
fn uimm12_scaled_nonzero_from_i64(&mut self, val: i64, ty: Type) -> Option<UImm12Scaled> {
if val == 0 {
return None;
}
UImm12Scaled::maybe_from_i64(val, ty)
}

fn test_and_compare_bit_const(&mut self, ty: Type, n: u64) -> Option<u8> {
if n.count_ones() != 1 {
return None;
Expand Down
125 changes: 125 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/amode-shared-base.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
test compile precise-output
set unwind_info=false
target aarch64

;; Multiple loads sharing `base + uextend(index)` with distinct constant
;; offsets: the `base + index` add is materialized once and reused, with each
;; offset folded into the load's immediate. Previously each load emitted its own
;; `add base, #offset` (defeating CSE of the shared `base + index`).
function %shared_base_uext(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iadd v0, v2
v4 = load.i16 v3+260
v5 = load.i16 v3+262
v6 = load.i16 v3+264
v7 = load.i16 v3+266
v8 = sextend.i32 v4
v9 = sextend.i32 v5
v10 = sextend.i32 v6
v11 = sextend.i32 v7
v12 = iadd v8, v9
v13 = iadd v10, v11
v14 = iadd v12, v13
return v14
}

; VCode:
; block0:
; add x11, x0, x1, UXTW
; ldrh w12, [x11, #260]
; ldrh w14, [x11, #262]
; ldrh w13, [x11, #264]
; sxth w14, w14
; ldrsh x11, [x11, #266]
; add w12, w14, w12, SXTH
; add w11, w11, w13, SXTH
; add w0, w12, w11
; ret
;
; Disassembled:
; block0: ; offset 0x0
; add x11, x0, w1, uxtw
; ldrh w12, [x11, #0x104] ; trap: heap_oob
; ldrh w14, [x11, #0x106] ; trap: heap_oob
; ldrh w13, [x11, #0x108] ; trap: heap_oob
; sxth w14, w14
; ldrsh x11, [x11, #0x10a] ; trap: heap_oob
; add w12, w14, w12, sxth
; add w11, w11, w13, sxth
; add w0, w12, w11
; ret

;; Same with a sign-extended index.
function %shared_base_sext(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = sextend.i64 v1
v3 = iadd v0, v2
v4 = load.i16 v3+260
v5 = load.i16 v3+262
v8 = sextend.i32 v4
v9 = sextend.i32 v5
v12 = iadd v8, v9
return v12
}

; VCode:
; block0:
; add x6, x0, x1, SXTW
; ldrh w7, [x6, #260]
; ldrsh x6, [x6, #262]
; add w0, w6, w7, SXTH
; ret
;
; Disassembled:
; block0: ; offset 0x0
; add x6, x0, w1, sxtw
; ldrh w7, [x6, #0x104] ; trap: heap_oob
; ldrsh x6, [x6, #0x106] ; trap: heap_oob
; add w0, w6, w7, sxth
; ret

;; A single load with a non-zero offset: both forms are two instructions, so
;; this is neutral (folds the offset into the load immediate).
function %single_use_uext(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iadd v0, v2
v4 = load.i16 v3+128
v5 = sextend.i32 v4
return v5
}

; VCode:
; block0:
; add x4, x0, x1, UXTW
; ldrsh x0, [x4, #128]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; add x4, x0, w1, uxtw
; ldrsh x0, [x4, #0x80] ; trap: heap_oob
; ret

;; Zero offset: must stay a single `ldrh [base, index, uxtw]` (the non-zero
;; guard keeps `RegExtended` here rather than splitting into add + load).
function %offset_zero_uext(i64, i32) -> i32 {
block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = iadd v0, v2
v4 = load.i16 v3
v5 = sextend.i32 v4
return v5
}

; VCode:
; block0:
; ldrsh x0, [x0, w1, UXTW]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldrsh x0, [x0, w1, uxtw] ; trap: heap_oob
; ret

48 changes: 48 additions & 0 deletions cranelift/filetests/filetests/runtests/amode-shared-base.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
test interpret
test run
target aarch64
target x86_64 has_sse3 has_ssse3 has_sse41
target s390x

;; Store at a fixed slot offset, then load it back through
;; `base + (extended index) + offset`, so the computed addressing mode must
;; resolve to the same byte. Catches a wrong base/offset in the amode.
function %amode_uext(i32) -> i16 {
ss0 = explicit_slot 32
block0(v0: i32):
v1 = iconst.i16 0x1234
v2 = stack_addr.i64 ss0
store.i16 v1, v2+10
v3 = uextend.i64 v0
v4 = iadd v2, v3
v5 = load.i16 v4+8
return v5
}
; run: %amode_uext(2) == 0x1234

function %amode_sext(i32) -> i16 {
ss0 = explicit_slot 32
block0(v0: i32):
v1 = iconst.i16 0x5678
v2 = stack_addr.i64 ss0
store.i16 v1, v2+12
v3 = sextend.i64 v0
v4 = iadd v2, v3
v5 = load.i16 v4+8
return v5
}
; run: %amode_sext(4) == 0x5678

;; Zero offset: exercises the RegExtended guard path.
function %amode_zero(i32) -> i16 {
ss0 = explicit_slot 32
block0(v0: i32):
v1 = iconst.i16 0x4321
v2 = stack_addr.i64 ss0
store.i16 v1, v2+6
v3 = uextend.i64 v0
v4 = iadd v2, v3
v5 = load.i16 v4
return v5
}
; run: %amode_zero(6) == 0x4321
Loading
Loading