

⚙ D72573 [SelectionDAG] ComputeKnownBits - minimum leading/trailing zero bits in...
source link: https://reviews.llvm.org/D72573
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

PR44526 - as detailed in https://blog.regehr.org/archives/1709 we don't make use of the known leading/trailing zeros for shifted values in cases where we don't know the shift amount value.
This patch adds support to SelectionDAG::ComputeKnownBits to use KnownBits::countMinTrailingZeros and countMinLeadingZeros to set the minimum guaranteed leading/trailing known zero bits.
Event Timeline
Same for signbit given arithmetic right-shift?
ComputeNumSignBits already does something similar for ISA::SRA - it always gets the number of sign bits of Op0 and increases it if the shift amount is constant
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Could also have getValidMinimumShiftAmountConstant() branch like for ISD::SRL?
Done in a number of follow up commits.
Same for signbit given arithmetic right-shift?
ComputeNumSignBits already does something similar for ISA::SRA - it always gets the number of sign bits of Op0 and increases it if the shift amount is constant
Ah indeed, but then ComputeNumSignBits() doesn't handle ISD::SRL like it does ISA::SRA.
Yup, ISD::SRL ComputeNumSignBits() calls use the default fallback to call ComputeKnownBits() as we're only interested in leading zero bits anyway.
case ISD::FSHL: case ISD::FSHR: if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { unsigned Amt = C->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg. // For fshr, 0-shift returns the 2nd arg. if (Amt == 0) { ▲ Show 20 Lines • Show All 6,859 Lines • Show Last 20 Lines
llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
Show First 20 Lines • Show All 294 Lines • ▼ Show 20 Lines %t1 = and i32 %t0, 1 %res = icmp eq i32 %t1, 0 ret i1 %res } define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const2_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 - ; CHECK-NEXT: mov w9, #43605 ; CHECK-NEXT: lsr w8, w8, w0 - ; CHECK-NEXT: movk w9, #43605, lsl #16 - ; CHECK-NEXT: tst w8, w9 + ; CHECK-NEXT: cmp w8, #0 // =0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 1, %y %t1 = and i32 %t0, 2857740885 %res = icmp eq i32 %t1, 0 ret i1 %res }
;------------------------------------------------------------------------------; ; A few negative tests ;------------------------------------------------------------------------------;
define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: negative_scalar_i8_bitsinmiddle_slt: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #24 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr w8, w8, w1 - ; CHECK-NEXT: and w8, w8, w0 - ; CHECK-NEXT: sxtb w8, w8 - ; CHECK-NEXT: cmp w8, #0 // =0 + ; CHECK-NEXT: tst w8, w0 ; CHECK-NEXT: cset w0, lt ; CHECK-NEXT: ret %t0 = lshr i8 24, %y %t1 = and i8 %t0, %x %res = icmp slt i8 %t1, 0 ret i1 %res }
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #128 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr w8, w8, w1 ; CHECK-NEXT: and w8, w8, w0 - ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: cmp w8, #1 // =1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 ret i1 %res }
llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
Show First 20 Lines • Show All 268 Lines • ▼ Show 20 Lines;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr w8, w8, w1 - ; CHECK-NEXT: ubfx w0, w8, #7, #1 + ; CHECK-NEXT: lsr w0, w8, #7 ; CHECK-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate ret i1 %res }
;------------------------------------------------------------------------------; ▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-128 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w8, w1 ; CHECK-NEXT: and w8, w8, w0 - ; CHECK-NEXT: and w8, w8, #0xff + ; CHECK-NEXT: and w8, w8, #0x80 ; CHECK-NEXT: cmp w8, #1 // =1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 ret i1 %res }
llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s - ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s - ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s + ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s + ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 { ; GFX9-LABEL: s_lshr_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x30 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v2, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, s0, v2 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm ; ; VI-LABEL: s_lshr_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; VI-NEXT: s_load_dword s5, s[0:1], 0x2c ; VI-NEXT: s_load_dword s0, s[0:1], 0x30 ; VI-NEXT: s_mov_b32 s4, 0xffff ; VI-NEXT: s_waitcnt lgkmcnt(0) - ; VI-NEXT: s_and_b32 s1, s5, s4 - ; VI-NEXT: s_and_b32 s4, s0, s4 - ; VI-NEXT: s_lshr_b32 s5, s5, 16 - ; VI-NEXT: s_lshr_b32 s0, s0, 16 - ; VI-NEXT: s_lshr_b32 s0, s5, s0 - ; VI-NEXT: v_mov_b32_e32 v0, s4 - ; VI-NEXT: v_bfe_u32 v0, s1, v0, 16 - ; VI-NEXT: s_lshl_b32 s0, s0, 16 - ; VI-NEXT: v_or_b32_e32 v2, s0, v0 ; VI-NEXT: v_mov_b32_e32 v0, s2 + ; VI-NEXT: s_lshr_b32 s1, s5, 16 + ; VI-NEXT: s_lshr_b32 s6, s0, 16 + ; VI-NEXT: s_lshr_b32 s1, s1, s6 + ; VI-NEXT: s_and_b32 s5, s5, s4 + ; VI-NEXT: s_and_b32 s0, s0, s4 + ; VI-NEXT: s_lshr_b32 s0, s5, s0 + ; VI-NEXT: s_lshl_b32 s1, s1, 16 + ; VI-NEXT: s_or_b32 s0, s0, s1 ; VI-NEXT: v_mov_b32_e32 v1, s3 + ; VI-NEXT: v_mov_b32_e32 v2, s0 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm ; ; CI-LABEL: s_lshr_v2i16: ; CI: ; %bb.0: ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; CI-NEXT: s_load_dword s2, s[0:1], 0xb ; CI-NEXT: s_load_dword s0, s[0:1], 0xc ; CI-NEXT: s_mov_b32 s3, 0xffff ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_lshr_b32 s1, s2, 16 ; CI-NEXT: s_lshr_b32 s8, s0, 16 + ; CI-NEXT: s_lshr_b32 s1, s1, s8 + ; CI-NEXT: s_and_b32 s2, s2, s3 ; CI-NEXT: s_and_b32 s0, s0, s3 + ; CI-NEXT: s_lshr_b32 s0, s2, s0 + ; CI-NEXT: s_lshl_b32 s1, s1, 16 + ; CI-NEXT: s_or_b32 s0, s0, s1 ; CI-NEXT: v_mov_b32_e32 v0, s0 - ; CI-NEXT: s_lshr_b32 s0, s1, s8 - ; CI-NEXT: s_and_b32 s2, s2, s3 - ; CI-NEXT: v_bfe_u32 v0, s2, v0, 16 - ; CI-NEXT: s_lshl_b32 s0, s0, 16 - ; CI-NEXT: v_or_b32_e32 v0, s0, v0 ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; CI-NEXT: s_endpgm %result = lshr <2 x i16> %lhs, %rhs store <2 x i16> %result, <2 x i16> addrspace(1)* %out ret void }
define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { ▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines; CI-NEXT: s_mov_b32 s8, 0xffff ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] ; CI-NEXT: s_waitcnt vmcnt(1) ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v3 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 - ; CI-NEXT: v_bfe_u32 v2, v2, v3, 16 + ; CI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 ; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v4 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext ▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines; CI-NEXT: s_lshr_b32 s9, s8, 16 ; CI-NEXT: s_mov_b32 s10, 0xffff ; CI-NEXT: s_and_b32 s8, s8, s10 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; CI-NEXT: v_and_b32_e32 v2, s10, v2 ; CI-NEXT: v_lshrrev_b32_e32 v3, s9, v3 - ; CI-NEXT: v_bfe_u32 v2, v2, s8, 16 + ; CI-NEXT: v_lshrrev_b32_e32 v2, s8, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext ▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines; CI-NEXT: s_lshr_b32 s9, s8, 16 ; CI-NEXT: s_mov_b32 s10, 0xffff ; CI-NEXT: s_and_b32 s8, s8, s10 ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; CI-NEXT: v_and_b32_e32 v2, s10, v2 ; CI-NEXT: v_lshr_b32_e32 v3, s9, v3 - ; CI-NEXT: v_bfe_u32 v2, s8, v2, 16 + ; CI-NEXT: v_lshr_b32_e32 v2, s8, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext ▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; CI-NEXT: v_lshr_b32_e32 v3, 8, v3 - ; CI-NEXT: v_bfe_u32 v2, 8, v2, 16 + ; CI-NEXT: v_lshr_b32_e32 v2, 8, v2 ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; CI-NEXT: v_or_b32_e32 v2, v2, v3 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext ▲ Show 20 Lines • Show All 129 Lines • ▼ Show 20 Lines; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v5 ; CI-NEXT: v_and_b32_e32 v2, s8, v2 ; CI-NEXT: v_and_b32_e32 v4, s8, v4 ; CI-NEXT: v_and_b32_e32 v3, s8, v3 ; CI-NEXT: v_and_b32_e32 v5, s8, v5 - ; CI-NEXT: v_bfe_u32 v3, v3, v5, 16 + ; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v3 ; CI-NEXT: v_lshrrev_b32_e32 v5, v9, v7 - ; CI-NEXT: v_bfe_u32 v2, v2, v4, 16 + ; CI-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; CI-NEXT: v_lshrrev_b32_e32 v4, v8, v6 ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; CI-NEXT: v_or_b32_e32 v3, v3, v5 ; CI-NEXT: v_or_b32_e32 v2, v2, v4 ; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() ▲ Show 20 Lines • Show All 85 Lines • Show Last 20 Lines
llvm/test/CodeGen/AMDGPU/shl.ll
Show First 20 Lines • Show All 1,178 Lines • ▼ Show 20 Lines; GCN-NEXT: s_lshl_b64 s[0:1], 1, s0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; ; EG-LABEL: s_shl_inline_imm_1_i64: ; EG: ; %bb.0: - ; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] + ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD ; EG-NEXT: ALU clause starting at 4: - ; EG-NEXT: SUB_INT * T0.W, literal.x, KC0[2].W, - ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) - ; EG-NEXT: LSHR T0.W, 1, PV.W, - ; EG-NEXT: ADD_INT * T1.W, KC0[2].W, literal.x, - ; EG-NEXT: -32(nan), 0(0.000000e+00) - ; EG-NEXT: LSHL T0.Z, 1, PS, - ; EG-NEXT: LSHR T0.W, PV.W, 1, - ; EG-NEXT: SETGT_UINT * T1.W, KC0[2].W, literal.x, - ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) - ; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, PV.Z, - ; EG-NEXT: LSHL * T0.W, 1, KC0[2].W, - ; EG-NEXT: CNDE_INT T0.X, T1.W, PV.W, 0.0, + ; EG-NEXT: ADD_INT T0.Z, KC0[2].W, literal.x, + ; EG-NEXT: SETGT_UINT T0.W, KC0[2].W, literal.y, + ; EG-NEXT: LSHL * T1.W, 1, KC0[2].W, + ; EG-NEXT: -32(nan), 31(4.344025e-44) + ; EG-NEXT: CNDE_INT T0.X, PV.W, PS, 0.0, + ; EG-NEXT: LSHL T1.W, 1, PV.Z, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) + ; EG-NEXT: CNDE_INT * T0.Y, T0.W, 0.0, PV.W, %shl = shl i64 1, %a store i64 %shl, i64 addrspace(1)* %out, align 8 ret void }
define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { ; GCN-LABEL: s_shl_inline_imm_1_0_i64: ; GCN: ; %bb.0: ▲ Show 20 Lines • Show All 519 Lines • Show Last 20 Lines
llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s - ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s - ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s + ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s + ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 { ; GFX9-LABEL: s_shl_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x30 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines; CI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, 0 ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: s_mov_b64 s[4:5], s[2:3] ; CI-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 - ; CI-NEXT: s_mov_b32 s4, 0xffff ; CI-NEXT: s_mov_b64 s[2:3], s[6:7] ; CI-NEXT: s_waitcnt vmcnt(0) - ; CI-NEXT: v_and_b32_e32 v3, s4, v2 + ; CI-NEXT: v_and_b32_e32 v3, 0xffff, v2 ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 ; CI-NEXT: v_lshl_b32_e32 v2, 8, v2 ; CI-NEXT: v_lshl_b32_e32 v3, 8, v3 ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 - ; CI-NEXT: v_and_b32_e32 v3, s4, v3 + ; CI-NEXT: v_and_b32_e32 v3, 0xfff8, v3 ; CI-NEXT: v_or_b32_e32 v2, v3, v2 ; CI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; CI-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i64 %tid.ext %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i64 %tid.ext %vgpr = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep ▲ Show 20 Lines • Show All 240 Lines • Show Last 20 Lines
llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
Show First 20 Lines • Show All 961 Lines • ▼ Show 20 Lines; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i32 2857740885, %y %t1 = and i32 %t0, 1 %res = icmp eq i32 %t1, 0 ret i1 %res } define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { - ; ARM6-LABEL: scalar_i32_x_is_const2_eq: - ; ARM6: @ %bb.0: - ; ARM6-NEXT: ldr r2, .LCPI19_0 - ; ARM6-NEXT: mov r1, #1 - ; ARM6-NEXT: and r0, r2, r1, lsr r0 - ; ARM6-NEXT: clz r0, r0 - ; ARM6-NEXT: lsr r0, r0, #5 - ; ARM6-NEXT: bx lr - ; ARM6-NEXT: .p2align 2 - ; ARM6-NEXT: @ %bb.1: - ; ARM6-NEXT: .LCPI19_0: - ; ARM6-NEXT: .long 2857740885 @ 0xaa55aa55 - ; - ; ARM78-LABEL: scalar_i32_x_is_const2_eq: - ; ARM78: @ %bb.0: - ; ARM78-NEXT: movw r1, #43605 - ; ARM78-NEXT: mov r2, #1 - ; ARM78-NEXT: movt r1, #43605 - ; ARM78-NEXT: and r0, r1, r2, lsr r0 - ; ARM78-NEXT: clz r0, r0 - ; ARM78-NEXT: lsr r0, r0, #5 - ; ARM78-NEXT: bx lr + ; ARM-LABEL: scalar_i32_x_is_const2_eq: + ; ARM: @ %bb.0: + ; ARM-NEXT: mov r1, #1 + ; ARM-NEXT: eor r0, r1, r1, lsr r0 + ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_x_is_const2_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: movs r1, #1 ; THUMB6-NEXT: lsrs r1, r0 - ; THUMB6-NEXT: ldr r2, .LCPI19_0 - ; THUMB6-NEXT: ands r2, r1 - ; THUMB6-NEXT: rsbs r0, r2, #0 - ; THUMB6-NEXT: adcs r0, r2 + ; THUMB6-NEXT: rsbs r0, r1, #0 + ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr - ; THUMB6-NEXT: .p2align 2 - ; THUMB6-NEXT: @ %bb.1: - ; THUMB6-NEXT: .LCPI19_0: - ; THUMB6-NEXT: .long 2857740885 @ 0xaa55aa55 ; ; THUMB78-LABEL: scalar_i32_x_is_const2_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: movs r1, #1 ; THUMB78-NEXT: lsr.w r0, r1, r0 - ; THUMB78-NEXT: movw r1, #43605 - ; THUMB78-NEXT: movt r1, #43605 - ; THUMB78-NEXT: ands r0, r1 - ; THUMB78-NEXT: clz r0, r0 - ; THUMB78-NEXT: lsrs r0, r0, #5 + ; THUMB78-NEXT: eor r0, r0, #1 ; THUMB78-NEXT: bx lr %t0 = lshr i32 1, %y %t1 = and i32 %t0, 2857740885 %res = icmp eq i32 %t1, 0 ret i1 %res }
;------------------------------------------------------------------------------; ; A few negative tests ;------------------------------------------------------------------------------;
define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; ARM6-LABEL: negative_scalar_i8_bitsinmiddle_slt: ; ARM6: @ %bb.0: ; ARM6-NEXT: uxtb r1, r1 ; ARM6-NEXT: mov r2, #24 - ; ARM6-NEXT: and r0, r0, r2, lsr r1 - ; ARM6-NEXT: sxtb r1, r0 + ; ARM6-NEXT: and r1, r0, r2, lsr r1 ; ARM6-NEXT: mov r0, #0 ; ARM6-NEXT: cmp r1, #0 ; ARM6-NEXT: movmi r0, #1 ; ARM6-NEXT: bx lr ; ; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt: ; ARM78: @ %bb.0: ; ARM78-NEXT: uxtb r1, r1 ; ARM78-NEXT: mov r2, #24 - ; ARM78-NEXT: and r0, r0, r2, lsr r1 - ; ARM78-NEXT: sxtb r1, r0 + ; ARM78-NEXT: and r1, r0, r2, lsr r1 ; ARM78-NEXT: mov r0, #0 ; ARM78-NEXT: cmp r1, #0 ; ARM78-NEXT: movwmi r0, #1 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 ; THUMB6-NEXT: movs r2, #24 ; THUMB6-NEXT: lsrs r2, r1 ; THUMB6-NEXT: ands r2, r0 - ; THUMB6-NEXT: sxtb r0, r2 - ; THUMB6-NEXT: cmp r0, #0 ; THUMB6-NEXT: bmi .LBB20_2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: movs r0, #0 ; THUMB6-NEXT: bx lr ; THUMB6-NEXT: .LBB20_2: ; THUMB6-NEXT: movs r0, #1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 ; THUMB78-NEXT: movs r2, #24 ; THUMB78-NEXT: lsr.w r1, r2, r1 ; THUMB78-NEXT: ands r0, r1 - ; THUMB78-NEXT: sxtb r1, r0 - ; THUMB78-NEXT: movs r0, #0 - ; THUMB78-NEXT: cmp r1, #0 + ; THUMB78-NEXT: mov.w r0, #0 ; THUMB78-NEXT: it mi ; THUMB78-NEXT: movmi r0, #1 ; THUMB78-NEXT: bx lr %t0 = lshr i8 24, %y %t1 = and i8 %t0, %x %res = icmp slt i8 %t1, 0 ret i1 %res }
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; ARM-LABEL: scalar_i8_signbit_eq_with_nonzero: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: mov r2, #128 ; ARM-NEXT: and r0, r0, r2, lsr r1 - ; ARM-NEXT: mvn r1, #0 - ; ARM-NEXT: uxtab r0, r1, r0 + ; ARM-NEXT: sub r0, r0, #1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 ; THUMB6-NEXT: movs r2, #128 ; THUMB6-NEXT: lsrs r2, r1 ; THUMB6-NEXT: ands r2, r0 - ; THUMB6-NEXT: uxtb r0, r2 - ; THUMB6-NEXT: subs r1, r0, #1 + ; THUMB6-NEXT: subs r1, r2, #1 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 ; THUMB78-NEXT: movs r2, #128 ; THUMB78-NEXT: lsr.w r1, r2, r1 ; THUMB78-NEXT: ands r0, r1 - ; THUMB78-NEXT: mov.w r1, #-1 - ; THUMB78-NEXT: uxtab r0, r1, r0 + ; THUMB78-NEXT: subs r0, #1 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 ; THUMB78-NEXT: bx lr %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 ret i1 %res }
llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
Show All 18 Lines define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; ARM-LABEL: scalar_i8_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: uxtb r0, r0 ; ARM-NEXT: lsr r0, r0, r1 ; ARM-NEXT: mov r1, #1 - ; ARM-NEXT: uxtb r0, r0 ; ARM-NEXT: eor r0, r1, r0, lsr #7 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 ; THUMB6-NEXT: uxtb r0, r0 ; THUMB6-NEXT: lsrs r0, r1 ; THUMB6-NEXT: movs r1, #128 ; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB7-LABEL: scalar_i8_signbit_eq: ; THUMB7: @ %bb.0: ; THUMB7-NEXT: uxtb r1, r1 ; THUMB7-NEXT: uxtb r0, r0 ; THUMB7-NEXT: lsrs r0, r1 ; THUMB7-NEXT: movs r1, #1 - ; THUMB7-NEXT: uxtb r0, r0 ; THUMB7-NEXT: eor.w r0, r1, r0, lsr #7 ; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i8_signbit_eq: ; THUMB8: @ %bb.0: ; THUMB8-NEXT: uxtb r0, r0 ; THUMB8-NEXT: uxtb r1, r1 ; THUMB8-NEXT: lsrs r0, r1 ; THUMB8-NEXT: movs r1, #1 - ; THUMB8-NEXT: uxtb r0, r0 ; THUMB8-NEXT: eor.w r0, r1, r0, lsr #7 ; THUMB8-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 ret i1 %res }
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; ARM-LABEL: scalar_i16_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: lsr r0, r0, r1 ; ARM-NEXT: mov r1, #1 - ; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: eor r0, r1, r0, lsr #15 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 ; THUMB6-NEXT: uxth r0, r0 ; THUMB6-NEXT: lsrs r0, r1 ; THUMB6-NEXT: movs r1, #1 ; THUMB6-NEXT: lsls r1, r1, #15 ; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB7-LABEL: scalar_i16_signbit_eq: ; THUMB7: @ %bb.0: ; THUMB7-NEXT: uxth r1, r1 ; THUMB7-NEXT: uxth r0, r0 ; THUMB7-NEXT: lsrs r0, r1 ; THUMB7-NEXT: movs r1, #1 - ; THUMB7-NEXT: uxth r0, r0 ; THUMB7-NEXT: eor.w r0, r1, r0, lsr #15 ; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i16_signbit_eq: ; THUMB8: @ %bb.0: ; THUMB8-NEXT: uxth r0, r0 ; THUMB8-NEXT: uxth r1, r1 ; THUMB8-NEXT: lsrs r0, r1 ; THUMB8-NEXT: movs r1, #1 - ; THUMB8-NEXT: uxth r0, r0 ; THUMB8-NEXT: eor.w r0, r1, r0, lsr #15 ; THUMB8-NEXT: bx lr %t0 = shl i16 32768, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 ret i1 %res }
▲ Show 20 Lines • Show All 761 Lines • ▼ Show 20 Lines;------------------------------------------------------------------------------;
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; ARM-LABEL: scalar_i8_signbit_ne: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: uxtb r0, r0 ; ARM-NEXT: lsr r0, r0, r1 - ; ARM-NEXT: uxtb r0, r0 ; ARM-NEXT: lsr r0, r0, #7 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_ne: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 ; THUMB6-NEXT: uxtb r0, r0 ; THUMB6-NEXT: lsrs r0, r1 - ; THUMB6-NEXT: uxtb r0, r0 ; THUMB6-NEXT: lsrs r0, r0, #7 ; THUMB6-NEXT: bx lr ; ; THUMB7-LABEL: scalar_i8_signbit_ne: ; THUMB7: @ %bb.0: ; THUMB7-NEXT: uxtb r1, r1 ; THUMB7-NEXT: uxtb r0, r0 ; THUMB7-NEXT: lsrs r0, r1 - ; THUMB7-NEXT: uxtb r0, r0 ; THUMB7-NEXT: lsrs r0, r0, #7 ; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i8_signbit_ne: ; THUMB8: @ %bb.0: ; THUMB8-NEXT: uxtb r0, r0 ; THUMB8-NEXT: uxtb r1, r1 ; THUMB8-NEXT: lsrs r0, r1 - ; THUMB8-NEXT: uxtb r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #7 ; THUMB8-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate ret i1 %res }
▲ Show 20 Lines • Show All 161 Lines • ▼ Show 20 Lines %t1 = and i8 %t0, %x %res = icmp slt i8 %t1, 0 ret i1 %res }
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; ARM-LABEL: scalar_i8_signbit_eq_with_nonzero: ; ARM: @ %bb.0: - ; ARM-NEXT: uxtb r1, r1 - ; ARM-NEXT: mvn r2, #127 - ; ARM-NEXT: and r0, r0, r2, lsl r1 - ; ARM-NEXT: mvn r1, #0 - ; ARM-NEXT: uxtab r0, r1, r0 - ; ARM-NEXT: clz r0, r0 - ; ARM-NEXT: lsr r0, r0, #5 + ; ARM-NEXT: mov r0, #0 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 ; THUMB6-NEXT: movs r2, #127 ; THUMB6-NEXT: mvns r2, r2 ; THUMB6-NEXT: lsls r2, r1 ; THUMB6-NEXT: ands r2, r0 ; THUMB6-NEXT: uxtb r0, r2 ; THUMB6-NEXT: subs r1, r0, #1 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero: ; THUMB78: @ %bb.0: - ; THUMB78-NEXT: uxtb r1, r1 - ; THUMB78-NEXT: mvn r2, #127 - ; THUMB78-NEXT: lsl.w r1, r2, r1 - ; THUMB78-NEXT: ands r0, r1 - ; THUMB78-NEXT: mov.w r1, #-1 - ; THUMB78-NEXT: uxtab r0, r1, r0 - ; THUMB78-NEXT: clz r0, r0 - ; THUMB78-NEXT: lsrs r0, r0, #5 + ; THUMB78-NEXT: movs r0, #0 ; THUMB78-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 1 ; should be comparing with 0 ret i1 %res }
llvm/test/CodeGen/BPF/shifts.ll
llvm/test/CodeGen/Mips/llvm-ir/lshr.ll
llvm/test/CodeGen/X86/avx2-shift.ll
llvm/test/CodeGen/X86/avx2-vector-shifts.ll
llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK