ARM - OpenGrok cross reference for /external/swiftshader/third_party/llvm-7.0/llvm/lib/Target/ARM/

//===---------------------------------------------------------------------===//
// Random ideas for the ARM backend (Thumb specific).
//===---------------------------------------------------------------------===//

* Add support for compiling functions in both ARM and Thumb mode, then taking
  the smallest.

* Add support for compiling individual basic blocks in thumb mode, when in a
  larger ARM function.  This can be used for presumed cold code, like paths
  to abort (failure path of asserts), EH handling code, etc.

* Thumb doesn't have normal pre/post increment addressing modes, but you can
  load/store 32-bit integers with pre/postinc by using load/store multiple
  instrs with a single register.

* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
  and cmp instructions can use high registers. Also, we can use them as
  temporaries to spill values into.

* In thumb mode, short, byte, and bool preferred alignments are currently set
  to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
  of 4).

//===---------------------------------------------------------------------===//

Potential jumptable improvements:

* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
  jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
  function is even smaller. This also applies to ARM.

* Thumb jumptable codegen can improve given some help from the assembler. This
  is what we generate right now:

	.set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
LPCRELL0:
	mov r1, #PCRELV0
	add r1, pc
	ldr r0, [r0, r1]
	mov pc, r0
	.align	2
LJTI1_0_0:
	.long	 LBB1_3
        ...

Note there is another pc relative add that we can take advantage of.
     add r1, pc, #imm_8 * 4

We should be able to generate:

LPCRELL0:
	add r1, LJTI1_0_0
	ldr r0, [r0, r1]
	mov pc, r0
	.align	2
LJTI1_0_0:
	.long	 LBB1_3

if the assembler can translate the add to:
       add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)

Note the assembler also does something similar to constpool load:
LPCRELL0:
     ldr r0, LCPI1_0
=>
     ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)


//===---------------------------------------------------------------------===//

We compile the following:

define i16 @func_entry_2E_ce(i32 %i) {
        switch i32 %i, label %bb12.exitStub [
                 i32 0, label %bb4.exitStub
                 i32 1, label %bb9.exitStub
                 i32 2, label %bb4.exitStub
                 i32 3, label %bb4.exitStub
                 i32 7, label %bb9.exitStub
                 i32 8, label %bb.exitStub
                 i32 9, label %bb9.exitStub
        ]

bb12.exitStub:
        ret i16 0

bb4.exitStub:
        ret i16 1

bb9.exitStub:
        ret i16 2

bb.exitStub:
        ret i16 3
}

into:

_func_entry_2E_ce:
        mov r2, #1
        lsl r2, r0
        cmp r0, #9
        bhi LBB1_4      @bb12.exitStub
LBB1_1: @newFuncRoot
        mov r1, #13
        tst r2, r1
        bne LBB1_5      @bb4.exitStub
LBB1_2: @newFuncRoot
        ldr r1, LCPI1_0
        tst r2, r1
        bne LBB1_6      @bb9.exitStub
LBB1_3: @newFuncRoot
        mov r1, #1
        lsl r1, r1, #8
        tst r2, r1
        bne LBB1_7      @bb.exitStub
LBB1_4: @bb12.exitStub
        mov r0, #0
        bx lr
LBB1_5: @bb4.exitStub
        mov r0, #1
        bx lr
LBB1_6: @bb9.exitStub
        mov r0, #2
        bx lr
LBB1_7: @bb.exitStub
        mov r0, #3
        bx lr
LBB1_8:
        .align  2
LCPI1_0:
        .long   642


gcc compiles to:

	cmp	r0, #9
	@ lr needed for prologue
	bhi	L2
	ldr	r3, L11
	mov	r2, #1
	mov	r1, r2, asl r0
	ands	r0, r3, r2, asl r0
	movne	r0, #2
	bxne	lr
	tst	r1, #13
	beq	L9
L3:
	mov	r0, r2
	bx	lr
L9:
	tst	r1, #256
	movne	r0, #3
	bxne	lr
L2:
	mov	r0, #0
	bx	lr
L12:
	.align 2
L11:
	.long	642


GCC is doing a couple of clever things here:
  1. It is predicating one of the returns.  This isn't a clear win though: in
     cases where that return isn't taken, it is replacing one condbranch with
     two 'ne' predicated instructions.
  2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
     tst.  This will probably require whole function isel.
  3. GCC emits:
  	tst	r1, #256
     we emit:
        mov r1, #1
        lsl r1, r1, #8
        tst r2, r1

//===---------------------------------------------------------------------===//

When spilling in thumb mode and the sp offset is too large to fit in the ldr /
str offset field, we load the offset from a constpool entry and add it to sp:

ldr r2, LCPI
add r2, sp
ldr r2, [r2]

These instructions preserve the condition code which is important if the spill
is between a cmp and a bcc instruction. However, we can use the (potentially)
cheaper sequnce if we know it's ok to clobber the condition register.

add r2, sp, #255 * 4
add r2, #132
ldr r2, [r2, #7 * 4]

This is especially bad when dynamic alloca is used. The all fixed size stack
objects are referenced off the frame pointer with negative offsets. See
oggenc for an example.

//===---------------------------------------------------------------------===//

Poor codegen test/CodeGen/ARM/select.ll f7:

	ldr r5, LCPI1_0
LPC0:
	add r5, pc
	ldr r6, LCPI1_1
	ldr r2, LCPI1_2
	mov r3, r6
	mov lr, pc
	bx r5

//===---------------------------------------------------------------------===//

Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
etc. Almost all Thumb instructions clobber condition code.

//===---------------------------------------------------------------------===//

Thumb load / store address mode offsets are scaled. The values kept in the
instruction operands are pre-scale values. This probably ought to be changed
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.

//===---------------------------------------------------------------------===//

We need to make (some of the) Thumb1 instructions predicable. That will allow
shrinking of predicated Thumb2 instructions. To allow this, we need to be able
to toggle the 's' bit since they do not set CPSR when they are inside IT blocks.

//===---------------------------------------------------------------------===//

Make use of hi register variants of cmp: tCMPhir / tCMPZhir.

//===---------------------------------------------------------------------===//

Thumb1 immediate field sometimes keep pre-scaled values. See
ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
Thumb2.

//===---------------------------------------------------------------------===//

Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
add a target specific ALIGN instruction instead. That way, getInstSizeInBytes
won't have to over-estimate. It can also be used for loop alignment pass.

//===---------------------------------------------------------------------===//

We generate conditional code for icmp when we don't need to. This code:

  int foo(int s) {
    return s == 1;
  }

produces:

foo:
        cmp     r0, #1
        mov.w   r0, #0
        it      eq
        moveq   r0, #1
        bx      lr

when it could use subs + adcs. This is GCC PR46975.
Name		Date	Size	#Lines	LOC
..		-	-
AsmParser/		23-Nov-2023	-	10,689	8,418
Disassembler/		23-Nov-2023	-	5,392	4,502
InstPrinter/		23-Nov-2023	-	1,829	1,497
MCTargetDesc/		23-Nov-2023	-	8,144	5,726
TargetInfo/		23-Nov-2023	-	70	54
Utils/		23-Nov-2023	-	239	164
A15SDOptimizer.cpp	D	23-Nov-2023	24 KiB	692	457
ARM.h	D	23-Nov-2023	2.5 KiB	76	50
ARM.td	D	23-Nov-2023	59.7 KiB	1,099	900
ARMAsmPrinter.cpp	D	23-Nov-2023	74.9 KiB	2,054	1,471
ARMAsmPrinter.h	D	23-Nov-2023	5.7 KiB	160	86
ARMBaseInstrInfo.cpp	D	23-Nov-2023	173.1 KiB	5,064	3,966
ARMBaseInstrInfo.h	D	23-Nov-2023	24.8 KiB	564	321
ARMBaseRegisterInfo.cpp	D	23-Nov-2023	32.2 KiB	860	627
ARMBaseRegisterInfo.h	D	23-Nov-2023	7.7 KiB	217	139
ARMBasicBlockInfo.h	D	23-Nov-2023	3.9 KiB	110	38
ARMCallLowering.cpp	D	23-Nov-2023	20.1 KiB	594	429
ARMCallLowering.h	D	23-Nov-2023	2.1 KiB	63	34
ARMCallingConv.h	D	23-Nov-2023	10.7 KiB	293	203
ARMCallingConv.td	D	23-Nov-2023	13.5 KiB	319	242
ARMCodeGenPrepare.cpp	D	23-Nov-2023	22.7 KiB	751	545
ARMComputeBlockSize.cpp	D	23-Nov-2023	2.4 KiB	82	53
ARMConstantIslandPass.cpp	D	23-Nov-2023	89.4 KiB	2,385	1,542
ARMConstantPoolValue.cpp	D	23-Nov-2023	11.5 KiB	299	229
ARMConstantPoolValue.h	D	23-Nov-2023	10.2 KiB	285	195
ARMExpandPseudoInsts.cpp	D	23-Nov-2023	83 KiB	1,945	1,616
ARMFastISel.cpp	D	23-Nov-2023	106.3 KiB	3,089	2,346
ARMFeatures.h	D	23-Nov-2023	2.4 KiB	98	74
ARMFrameLowering.cpp	D	23-Nov-2023	94.9 KiB	2,502	1,766
ARMFrameLowering.h	D	23-Nov-2023	3.5 KiB	89	58
ARMHazardRecognizer.cpp	D	23-Nov-2023	3.4 KiB	102	74
ARMHazardRecognizer.h	D	23-Nov-2023	1.5 KiB	50	24
ARMISelDAGToDAG.cpp	D	23-Nov-2023	165.7 KiB	4,333	3,424
ARMISelLowering.cpp	D	23-Nov-2023	577.1 KiB	14,995	10,844
ARMISelLowering.h	D	23-Nov-2023	34 KiB	810	524
ARMInstrFormats.td	D	23-Nov-2023	87.5 KiB	2,621	2,322
ARMInstrInfo.cpp	D	23-Nov-2023	5.1 KiB	166	135
ARMInstrInfo.h	D	23-Nov-2023	1.8 KiB	55	24
ARMInstrInfo.td	D	23-Nov-2023	237.1 KiB	6,168	5,465
ARMInstrNEON.td	D	23-Nov-2023	410.7 KiB	8,546	7,790
ARMInstrThumb.td	D	23-Nov-2023	63.4 KiB	1,708	1,494
ARMInstrThumb2.td	D	23-Nov-2023	187.4 KiB	4,868	4,315
ARMInstrVFP.td	D	23-Nov-2023	95.8 KiB	2,483	2,139
ARMInstructionSelector.cpp	D	23-Nov-2023	32.4 KiB	966	764
ARMLegalizerInfo.cpp	D	23-Nov-2023	17.4 KiB	423	324
ARMLegalizerInfo.h	D	23-Nov-2023	2.4 KiB	66	28
ARMLoadStoreOptimizer.cpp	D	23-Nov-2023	85 KiB	2,448	1,896
ARMMCInstLower.cpp	D	23-Nov-2023	7.3 KiB	234	176
ARMMachineFunctionInfo.cpp	D	23-Nov-2023	639	20	7
ARMMachineFunctionInfo.h	D	23-Nov-2023	8.9 KiB	247	123
ARMMacroFusion.cpp	D	23-Nov-2023	2.8 KiB	85	48
ARMMacroFusion.h	D	23-Nov-2023	839	25	4
ARMOptimizeBarriersPass.cpp	D	23-Nov-2023	3.4 KiB	108	64
ARMParallelDSP.cpp	D	23-Nov-2023	23.5 KiB	673	473
ARMPerfectShuffle.h	D	23-Nov-2023	382 KiB	6,592	6,567
ARMRegisterBankInfo.cpp	D	23-Nov-2023	16.3 KiB	444	370
ARMRegisterBankInfo.h	D	23-Nov-2023	1.3 KiB	44	21
ARMRegisterBanks.td	D	23-Nov-2023	549	15	13
ARMRegisterInfo.cpp	D	23-Nov-2023	657	20	4
ARMRegisterInfo.h	D	23-Nov-2023	817	32	12
ARMRegisterInfo.td	D	23-Nov-2023	19.9 KiB	482	427
ARMSchedule.td	D	23-Nov-2023	15.1 KiB	429	406
ARMScheduleA57.td	D	23-Nov-2023	62.4 KiB	1,503	1,280
ARMScheduleA57WriteRes.td	D	23-Nov-2023	11.4 KiB	324	308
ARMScheduleA8.td	D	23-Nov-2023	49.6 KiB	1,076	1,060
ARMScheduleA9.td	D	23-Nov-2023	130.3 KiB	2,580	2,452
ARMScheduleM3.td	D	23-Nov-2023	829	22	19
ARMScheduleR52.td	D	23-Nov-2023	44.2 KiB	929	801
ARMScheduleSwift.td	D	23-Nov-2023	50.5 KiB	1,094	1,014
ARMScheduleV6.td	D	23-Nov-2023	12.3 KiB	301	290
ARMSelectionDAGInfo.cpp	D	23-Nov-2023	9.2 KiB	257	185
ARMSelectionDAGInfo.h	D	23-Nov-2023	2.8 KiB	70	42
ARMSubtarget.cpp	D	23-Nov-2023	13.4 KiB	398	270
ARMSubtarget.h	D	23-Nov-2023	27.9 KiB	804	448
ARMSystemRegister.td	D	23-Nov-2023	5.4 KiB	157	135
ARMTargetMachine.cpp	D	23-Nov-2023	18 KiB	516	371
ARMTargetMachine.h	D	23-Nov-2023	3.4 KiB	99	63
ARMTargetObjectFile.cpp	D	23-Nov-2023	3.2 KiB	89	59
ARMTargetObjectFile.h	D	23-Nov-2023	1.6 KiB	46	25
ARMTargetTransformInfo.cpp	D	23-Nov-2023	25.2 KiB	633	462
ARMTargetTransformInfo.h	D	23-Nov-2023	6.8 KiB	192	120
CMakeLists.txt	D	23-Nov-2023	1.9 KiB	67	62
LICENSE.TXT	D	23-Nov-2023	2.7 KiB	48	40
LLVMBuild.txt	D	23-Nov-2023	1 KiB	36	32
MLxExpansionPass.cpp	D	23-Nov-2023	11.7 KiB	396	303
README-Thumb.txt	D	23-Nov-2023	7 KiB	262	204
README-Thumb2.txt	D	23-Nov-2023	308	7	5
README.txt	D	23-Nov-2023	22.2 KiB	733	555
Thumb1FrameLowering.cpp	D	23-Nov-2023	36 KiB	1,001	744
Thumb1FrameLowering.h	D	23-Nov-2023	3.5 KiB	90	34
Thumb1InstrInfo.cpp	D	23-Nov-2023	5.8 KiB	157	115
Thumb1InstrInfo.h	D	23-Nov-2023	2.3 KiB	63	32
Thumb2ITBlockPass.cpp	D	23-Nov-2023	9.6 KiB	322	225
Thumb2InstrInfo.cpp	D	23-Nov-2023	22.7 KiB	685	546
Thumb2InstrInfo.h	D	23-Nov-2023	2.9 KiB	75	37
Thumb2SizeReduction.cpp	D	23-Nov-2023	38.9 KiB	1,127	835
ThumbRegisterInfo.cpp	D	23-Nov-2023	23.5 KiB	622	458
ThumbRegisterInfo.h	D	23-Nov-2023	2.6 KiB	67	38