diff options
| author | Elliott Hughes <enh@google.com> | 2010-02-24 16:36:18 -0800 |
|---|---|---|
| committer | Elliott Hughes <enh@google.com> | 2010-02-24 16:36:18 -0800 |
| commit | b4c05977c28c38d2f81b48d0cb15559dc3d05564 (patch) | |
| tree | b40cc2cc302a088b931eb97bdc0fd6997d96f41f /vm/compiler/codegen | |
| parent | 88a0f970e47dc0091d2c9965aa9bd06667e5f4b7 (diff) | |
Optimize more easy multiplications by constants.
Rather than make these changes in the libraries (*10 being a common case),
let's do them once and for all in the JIT.
The 2^n-1 case could be better if we generated RSB instructions, but the
current "fake" RSB is still better than a full multiply.
Thumb doesn't support reg/reg/reg/shift instructions, so we can't optimize
the "population count <= 2" cases (such as *10) there.
Tested on sholes, passion, and passion-running-sapphire (and visually
inspected to check we weren't trying to generate Thumb2 instructions there).
Also tested with the self-verifier.
Diffstat (limited to 'vm/compiler/codegen')
| -rw-r--r-- | vm/compiler/codegen/arm/ArmLIR.h | 2 | ||||
| -rw-r--r-- | vm/compiler/codegen/arm/Assemble.c | 2 | ||||
| -rw-r--r-- | vm/compiler/codegen/arm/CodegenDriver.c | 90 | ||||
| -rw-r--r-- | vm/compiler/codegen/arm/Thumb/Gen.c | 9 | ||||
| -rw-r--r-- | vm/compiler/codegen/arm/Thumb2/Gen.c | 11 |
5 files changed, 92 insertions, 22 deletions
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index b329aedb7..7d262215e 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -706,7 +706,7 @@ extern ArmEncodingMap EncodingMap[kArmLast]; /* * Each instance of this struct holds a pseudo or real LIR instruction: - * - pesudo ones (eg labels and marks) and will be discarded by the assembler. + * - pseudo ones (eg labels and marks) and will be discarded by the assembler. * - real ones will be assembled into Thumb instructions. * * Machine resources are encoded into a 64-bit vector, where the encodings are diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index 09783f232..7f8ad9edf 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -36,7 +36,7 @@ * s2e: src2 end bit position * operands: number of operands (for sanity check purposes) * name: mnemonic name - * fmt: for pretty-prining + * fmt: for pretty-printing */ #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ k3, k3s, k3e, flags, name, fmt, size) \ diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c index 4048c0a6d..f117de8f1 100644 --- a/vm/compiler/codegen/arm/CodegenDriver.c +++ b/vm/compiler/codegen/arm/CodegenDriver.c @@ -1844,20 +1844,75 @@ static bool handleFmt21t(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, return false; } -/* Positive power of 2? Return shiftCount if so, -1 if not */ -static int mulToShift(int val) { - int shiftCount; - if (val > 0 && ((val & (val - 1)) == 0)) { - shiftCount = 0; - val = ~val & (val - 1); - while (val) { - shiftCount++; - val >>= 1; - } +static bool isPowerOfTwo(int x) +{ + return (x & (x - 1)) == 0; +} + +// Returns true if no more than two bits are set in 'x'. +static bool isPopCountLE2(unsigned int x) +{ + x &= x - 1; + return (x & (x - 1)) == 0; +} + +// Returns the index of the lowest set bit in 'x'. +static int lowestSetBit(unsigned int x) { + int bit_posn = 0; + while ((x & 0xf) == 0) { + bit_posn += 4; + x >>= 4; + } + while ((x & 1) == 0) { + bit_posn++; + x >>= 1; + } + return bit_posn; +} + +// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit' +// and store the result in 'rlDest'. +static bool handleEasyMultiply(CompilationUnit *cUnit, + RegLocation rlSrc, RegLocation rlDest, int lit) +{ + // Can we simplify this multiplication? + bool powerOfTwo = false; + bool popCountLE2 = false; + bool powerOfTwoMinusOne = false; + if (lit < 2) { + // Avoid special cases. + return false; + } else if (isPowerOfTwo(lit)) { + powerOfTwo = true; + } else if (isPopCountLE2(lit)) { + popCountLE2 = true; + } else if (isPowerOfTwo(lit + 1)) { + powerOfTwoMinusOne = true; + } else { + return false; + } + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true); + if (powerOfTwo) { + // Shift. + opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg, + lowestSetBit(lit)); + } else if (popCountLE2) { + // Shift and add and shift. + int firstBit = lowestSetBit(lit); + int secondBit = lowestSetBit(lit ^ (1 << firstBit)); + genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit, + firstBit, secondBit); } else { - shiftCount = -1; + // Reverse subtract: (src << (shift + 1)) - src. + assert(powerOfTwoMinusOne); + // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1) + int tReg = dvmCompilerAllocTemp(cUnit); + opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1)); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg); } - return shiftCount; + storeValue(cUnit, rlDest, rlResult); + return true; } static bool handleFmt22b_Fmt22s(CompilationUnit *cUnit, MIR *mir) @@ -1896,15 +1951,10 @@ static bool handleFmt22b_Fmt22s(CompilationUnit *cUnit, MIR *mir) break; case OP_MUL_INT_LIT8: case OP_MUL_INT_LIT16: { - // TUNING: General shift & add for small constants - int shiftCount = mulToShift(lit); - if (shiftCount >= 0) { - op = kOpLsl; - lit = shiftCount; - shiftOp = true; - } else { - op = kOpMul; + if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) { + return false; } + op = kOpMul; break; } case OP_AND_INT_LIT8: diff --git a/vm/compiler/codegen/arm/Thumb/Gen.c b/vm/compiler/codegen/arm/Thumb/Gen.c index a274e1928..35c4451b7 100644 --- a/vm/compiler/codegen/arm/Thumb/Gen.c +++ b/vm/compiler/codegen/arm/Thumb/Gen.c @@ -289,3 +289,12 @@ static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) dvmCompilerClobber(cUnit, ophi); return false; } + +static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, + RegLocation rlSrc, RegLocation rlResult, int lit, + int firstBit, int secondBit) +{ + // We can't implement "add src, src, src, lsl#shift" on Thumb, so we have + // to do a regular multiply. + opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit); +} diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c index 1390d64c7..dbce45284 100644 --- a/vm/compiler/codegen/arm/Thumb2/Gen.c +++ b/vm/compiler/codegen/arm/Thumb2/Gen.c @@ -442,3 +442,14 @@ static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir) storeValueWide(cUnit, rlDest, rlResult); return false; } + +static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit, + RegLocation rlSrc, RegLocation rlResult, int lit, + int firstBit, int secondBit) +{ + opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg, + encodeShift(kArmLsl, secondBit - firstBit)); + if (firstBit != 0) { + opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit); + } +} |
