diff options
| author | Anders O Nilsson <anders.o.nilsson@stericsson.com> | 2012-10-03 09:24:22 +0200 |
|---|---|---|
| committer | Steve Kondik <shade@chemlab.org> | 2013-07-24 12:59:42 -0700 |
| commit | 96d80eebbb1ba0c8a7b195514e82ac1118a88eb6 (patch) | |
| tree | 3acc014c85e7b32a746ce6756363724a53016fe0 /vm/compiler/codegen/arm/LocalOptimizations.cpp | |
| parent | d49ca93f47b83e1b03be8d610d6ee5ec680dee7f (diff) | |
JIT: Combine add with shift.
Optimize logical shift for ARM JIT.
Whenever logical shift is followed by an add,
try to replace it with an add capable of performing
the shift in the same instruction.
This improves performance for usecases
involving code executing in Dalvik.
Change-Id: I3cb807b6d6ef4b053a19e2703676a93a930eb963
Signed-off-by: Patrik Ryd <patrik.ryd@stericsson.com>
Diffstat (limited to 'vm/compiler/codegen/arm/LocalOptimizations.cpp')
| -rw-r--r-- | vm/compiler/codegen/arm/LocalOptimizations.cpp | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.cpp b/vm/compiler/codegen/arm/LocalOptimizations.cpp index 8013d0059..cb35d745d 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.cpp +++ b/vm/compiler/codegen/arm/LocalOptimizations.cpp @@ -453,6 +453,136 @@ static void applyLoadHoisting(CompilationUnit *cUnit, } } +/* + * Find all lsl/lsr and add that can be replaced with a + * combined lsl/lsr + add + */ +static void applyShiftArithmeticOpts(CompilationUnit *cUnit, + ArmLIR *headLIR, + ArmLIR *tailLIR) { + ArmLIR *thisLIR = NULL; + + for (thisLIR = headLIR; + thisLIR != tailLIR; + thisLIR = NEXT_LIR(thisLIR)) { + + if(thisLIR->flags.isNop) { + continue; + } + + if(thisLIR->opcode == kThumb2LslRRI5 || thisLIR->opcode == kThumb2LsrRRI5 || + thisLIR->opcode == kThumbLslRRI5 || thisLIR->opcode == kThumbLsrRRI5) { + + /* Find next that is not nop and not pseudo code */ + ArmLIR *nextLIR = NULL; + for(nextLIR = NEXT_LIR(thisLIR); + nextLIR != tailLIR; + nextLIR = NEXT_LIR(nextLIR)) { + if (!nextLIR->flags.isNop && !isPseudoOpcode(nextLIR->opcode)) { + break; + } + } + + if(nextLIR == tailLIR) { + return; + } + + if(nextLIR->opcode == kThumb2AddRRR && + nextLIR->operands[3] == 0 && + (nextLIR->operands[1] == thisLIR->operands[0] || + nextLIR->operands[2] == thisLIR->operands[0])) { + + bool applyOpt = true; + if(!(thisLIR->operands[0] == nextLIR->operands[0])) { + /* Check that shift dest reg is not used after + * the addition. */ + ArmLIR* tmpLIR = NULL; + for(tmpLIR = NEXT_LIR(nextLIR); + tmpLIR != tailLIR; + tmpLIR = NEXT_LIR(tmpLIR)) { + + if (!tmpLIR->flags.isNop && + !(EncodingMap[tmpLIR->opcode].flags & IS_BRANCH) && + (tmpLIR->defMask | tmpLIR->useMask) & thisLIR->defMask) { + if(tmpLIR->useMask & thisLIR->defMask) { + /* Shift dest reg is used for src, skip opt. */ + applyOpt = false; + } + break; + } + } + } + + if(applyOpt) { + + /* + * Found lsl/lsr & add, use barrel shifter for add instead + * + * (1) Normal case + * [lsl/lsr] r9, r1, #x + * [add] r0, r2, r9 + * + * (2) Changing place of args for add + * [lsl/lsr] r9, r1, #x + * [add] r0, r9, r2 + * + * (3) Using r1 and r1 shifted as args for add + * [lsl/lsr] r9, r1, #x + * [add] r0, r1, r9 + * + * (4) Using r1 and r1 shifted as args for add, variant 2 + * [lsl/lsr] r9, r1, #x + * [add] r0, r9, r1 + * + * Result: + * [add] rDest, rSrc1, rSrc2, [lsl/lsr] x + */ + + int type = kArmLsl; + if(thisLIR->opcode == kThumb2LsrRRI5 || thisLIR->opcode == kThumbLsrRRI5) { + type = kArmLsr; + } + + /* For most cases keep original rSrc1 */ + int rSrc1 = nextLIR->operands[1]; + + if(thisLIR->operands[0] == nextLIR->operands[1]) { + /* Case 2 & 4: move original rSrc2 to rScr1 since + reg to be shifted need to be in rSrc2 */ + rSrc1 = nextLIR->operands[2]; + } + + /* Reg to be shifted need to be in rSrc2 */ + int rSrc2 = thisLIR->operands[1]; + + /* Encode type of shift and amount */ + int shift = ((thisLIR->operands[2] & 0x1f) << 2) | type; + + /* Keep rDest, but change rSrc1, rSrc2 and use shift */ + ArmLIR* newLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true); + newLIR->opcode = nextLIR->opcode; + newLIR->operands[0] = nextLIR->operands[0]; + newLIR->operands[1] = rSrc1; + newLIR->operands[2] = rSrc2; + newLIR->operands[3] = shift; + dvmCompilerSetupResourceMasks(newLIR); + dvmCompilerInsertLIRBefore((LIR *) nextLIR, (LIR *) newLIR); + + thisLIR->flags.isNop = true; + nextLIR->flags.isNop = true; + } + + /* + * Avoid looping through nops already identified. + * Continue directly after the updated instruction + * instead. + */ + thisLIR = nextLIR; + } + } + } +} + void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR, LIR *tailLIR) { @@ -463,4 +593,7 @@ void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR, if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) { applyLoadHoisting(cUnit, (ArmLIR *) headLIR, (ArmLIR *) tailLIR); } + if (!(gDvmJit.disableOpt & (1 << kShiftArithmetic))) { + applyShiftArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR); + } } |
