JIT: Combine add with shift.

Optimize logical shift for ARM JIT. Whenever logical shift is followed by an add, try to replace it with an add capable of performing the shift in the same instruction. This improves performance for usecases involving code executing in Dalvik. Change-Id: I3cb807b6d6ef4b053a19e2703676a93a930eb963 Signed-off-by: Patrik Ryd <patrik.ryd@stericsson.com>
author: Anders O Nilsson <anders.o.nilsson@stericsson.com> 2012-10-03 09:24:22 +0200
committer: Steve Kondik <shade@chemlab.org> 2013-07-24 12:59:42 -0700
commit: 96d80eebbb1ba0c8a7b195514e82ac1118a88eb6 (patch)
tree: 3acc014c85e7b32a746ce6756363724a53016fe0 /vm/compiler/codegen/arm/LocalOptimizations.cpp
parent: d49ca93f47b83e1b03be8d610d6ee5ec680dee7f (diff)
1 files changed, 133 insertions, 0 deletions
diff --git a/vm/compiler/codegen/arm/LocalOptimizations.cpp b/vm/compiler/codegen/arm/LocalOptimizations.cpp
index 8013d0059..cb35d745d 100644
--- a/vm/compiler/codegen/arm/LocalOptimizations.cpp
+++ b/vm/compiler/codegen/arm/LocalOptimizations.cpp
@@ -453,6 +453,136 @@ static void applyLoadHoisting(CompilationUnit *cUnit,
     }
 }
 
+/*
+ * Find all lsl/lsr and add that can be replaced with a
+ * combined lsl/lsr + add
+ */
+static void applyShiftArithmeticOpts(CompilationUnit *cUnit,
+                                     ArmLIR *headLIR,
+                                     ArmLIR *tailLIR) {
+    ArmLIR *thisLIR = NULL;
+
+    for (thisLIR = headLIR;
+         thisLIR != tailLIR;
+         thisLIR = NEXT_LIR(thisLIR)) {
+
+        if(thisLIR->flags.isNop) {
+            continue;
+        }
+
+        if(thisLIR->opcode == kThumb2LslRRI5 || thisLIR->opcode == kThumb2LsrRRI5 ||
+           thisLIR->opcode == kThumbLslRRI5 || thisLIR->opcode == kThumbLsrRRI5) {
+
+            /* Find next that is not nop and not pseudo code */
+            ArmLIR *nextLIR = NULL;
+            for(nextLIR = NEXT_LIR(thisLIR);
+                nextLIR != tailLIR;
+                nextLIR = NEXT_LIR(nextLIR)) {
+                if (!nextLIR->flags.isNop && !isPseudoOpcode(nextLIR->opcode)) {
+                    break;
+                }
+            }
+
+            if(nextLIR == tailLIR) {
+                return;
+            }
+
+            if(nextLIR->opcode == kThumb2AddRRR &&
+               nextLIR->operands[3] == 0 &&
+               (nextLIR->operands[1] == thisLIR->operands[0] ||
+                nextLIR->operands[2] == thisLIR->operands[0])) {
+
+                bool applyOpt = true;
+                if(!(thisLIR->operands[0] == nextLIR->operands[0])) {
+                    /* Check that shift dest reg is not used after
+                     * the addition. */
+                    ArmLIR* tmpLIR = NULL;
+                    for(tmpLIR = NEXT_LIR(nextLIR);
+                        tmpLIR != tailLIR;
+                        tmpLIR = NEXT_LIR(tmpLIR)) {
+
+                        if (!tmpLIR->flags.isNop &&
+                            !(EncodingMap[tmpLIR->opcode].flags & IS_BRANCH) &&
+                            (tmpLIR->defMask | tmpLIR->useMask) & thisLIR->defMask) {
+                            if(tmpLIR->useMask & thisLIR->defMask) {
+                                /* Shift dest reg is used for src, skip opt. */
+                                applyOpt = false;
+                            }
+                            break;
+                        }
+                    }
+                }
+
+                if(applyOpt) {
+
+                    /*
+                     *  Found lsl/lsr & add, use barrel shifter for add instead
+                     *
+                     *   (1) Normal case
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r2, r9
+                     *
+                     *   (2) Changing place of args for add
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r9, r2
+                     *
+                     *   (3) Using r1 and r1 shifted as args for add
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r1, r9
+                     *
+                     *   (4) Using r1 and r1 shifted as args for add, variant 2
+                     *   [lsl/lsr] r9, r1, #x
+                     *   [add]     r0, r9, r1
+                     *
+                     *   Result:
+                     *   [add]     rDest, rSrc1, rSrc2, [lsl/lsr] x
+                     */
+
+                    int type = kArmLsl;
+                    if(thisLIR->opcode == kThumb2LsrRRI5 || thisLIR->opcode == kThumbLsrRRI5) {
+                        type = kArmLsr;
+                    }
+
+                    /* For most cases keep original rSrc1 */
+                    int rSrc1 = nextLIR->operands[1];
+
+                    if(thisLIR->operands[0] == nextLIR->operands[1]) {
+                        /* Case 2 & 4: move original rSrc2 to rScr1 since
+                           reg to be shifted need to be in rSrc2 */
+                        rSrc1 = nextLIR->operands[2];
+                    }
+
+                    /* Reg to be shifted need to be in rSrc2 */
+                    int rSrc2 = thisLIR->operands[1];
+
+                    /* Encode type of shift and amount */
+                    int shift = ((thisLIR->operands[2] & 0x1f) << 2) | type;
+
+                    /* Keep rDest, but change rSrc1, rSrc2 and use shift */
+                    ArmLIR* newLIR = (ArmLIR *)dvmCompilerNew(sizeof(ArmLIR), true);
+                    newLIR->opcode = nextLIR->opcode;
+                    newLIR->operands[0] = nextLIR->operands[0];
+                    newLIR->operands[1] = rSrc1;
+                    newLIR->operands[2] = rSrc2;
+                    newLIR->operands[3] = shift;
+                    dvmCompilerSetupResourceMasks(newLIR);
+                    dvmCompilerInsertLIRBefore((LIR *) nextLIR, (LIR *) newLIR);
+
+                    thisLIR->flags.isNop = true;
+                    nextLIR->flags.isNop = true;
+                }
+
+                /*
+                 * Avoid looping through nops already identified.
+                 * Continue directly after the updated instruction
+                 * instead.
+                 */
+                thisLIR = nextLIR;
+            }
+        }
+    }
+}
+
 void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
                                         LIR *tailLIR)
 {
@@ -463,4 +593,7 @@ void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
     if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) {
         applyLoadHoisting(cUnit, (ArmLIR *) headLIR, (ArmLIR *) tailLIR);
     }
+    if (!(gDvmJit.disableOpt & (1 << kShiftArithmetic))) {
+        applyShiftArithmeticOpts(cUnit, (ArmLIR *) headLIR, (ArmLIR* ) tailLIR);
+    }
 }
author	Anders O Nilsson <anders.o.nilsson@stericsson.com>	2012-10-03 09:24:22 +0200
committer	Steve Kondik <shade@chemlab.org>	2013-07-24 12:59:42 -0700
commit	96d80eebbb1ba0c8a7b195514e82ac1118a88eb6 (patch)
tree	3acc014c85e7b32a746ce6756363724a53016fe0 /vm/compiler/codegen/arm/LocalOptimizations.cpp
parent	d49ca93f47b83e1b03be8d610d6ee5ec680dee7f (diff)