summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuScript.cpp
diff options
context:
space:
mode:
authorChris Wailes <chriswailes@google.com>2014-07-16 15:18:30 -0700
committerChris Wailes <chriswailes@google.com>2014-08-15 18:26:36 -0700
commitf37121300217d3b39ab66dd9c8881bcbcad932df (patch)
tree2e06a02e490f318cc03b95b03112860e3d38e9a6 /cpu_ref/rsCpuScript.cpp
parentcd8df40f07d88f896bea05ed06a20d1a4d5e9728 (diff)
Collapse code paths for single- and multi-input kernels.
This patch simplifies the RenderScript driver and CPU reference implementation by removing the distinction between sing- and multi-input kernels in many places. The distinction is maintained in some places due to the need to maintain backwards compatibility. This permits the deletion of some functions and struct members that are no longer needed. Several related functions were also cleaned up. Change-Id: Id70a223ea5e3aa2b0b935b2b7f9af933339ae8a4
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
-rw-r--r--cpu_ref/rsCpuScript.cpp214
1 files changed, 49 insertions, 165 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index a11fda19..05984207 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -789,144 +789,33 @@ void RsdCpuScriptImpl::populateScript(Script *script) {
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
+void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
const void * usr, uint32_t usrLen,
const RsScriptCall *sc,
MTLaunchStruct *mtls) {
memset(mtls, 0, sizeof(MTLaunchStruct));
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
- return;
- }
-
- if (ain != NULL) {
- const Type *inType = ain->getType();
+ for (int index = inLen; --index >= 0;) {
+ const Allocation* ain = ains[index];
- mtls->fep.dimX = inType->getDimX();
- mtls->fep.dimY = inType->getDimY();
- mtls->fep.dimZ = inType->getDimZ();
-
- } else if (aout != NULL) {
- const Type *outType = aout->getType();
-
- mtls->fep.dimX = outType->getDimX();
- mtls->fep.dimY = outType->getDimY();
- mtls->fep.dimZ = outType->getDimZ();
-
- } else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (ain != NULL && aout != NULL) {
- if (!ain->hasSameDims(aout)) {
+ // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
+ if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; dimensions of input and output allocations do not match.");
-
+ "rsForEach called with null in allocations");
return;
}
}
- if (!sc || (sc->xEnd == 0)) {
- mtls->xEnd = mtls->fep.dimX;
- } else {
- rsAssert(sc->xStart < mtls->fep.dimX);
- rsAssert(sc->xEnd <= mtls->fep.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
- mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
- if (mtls->xStart >= mtls->xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->yEnd = mtls->fep.dimY;
- } else {
- rsAssert(sc->yStart < mtls->fep.dimY);
- rsAssert(sc->yEnd <= mtls->fep.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
- mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
- if (mtls->yStart >= mtls->yEnd) return;
- }
-
- if (!sc || (sc->zEnd == 0)) {
- mtls->zEnd = mtls->fep.dimZ;
- } else {
- rsAssert(sc->zStart < mtls->fep.dimZ);
- rsAssert(sc->zEnd <= mtls->fep.dimZ);
- rsAssert(sc->zStart < sc->zEnd);
- mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
- mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
- if (mtls->zStart >= mtls->zEnd) return;
- }
-
- mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
- mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
- mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
- mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
- rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
- mtls->rsc = mCtx;
- mtls->ain = ain;
- mtls->aout = aout;
- mtls->fep.usr = usr;
- mtls->fep.usrLen = usrLen;
- mtls->mSliceSize = 1;
- mtls->mSliceNum = 0;
-
- mtls->fep.ptrIn = NULL;
- mtls->fep.eStrideIn = 0;
- mtls->isThreadable = mIsThreadable;
-
- if (ain) {
- mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
- }
-
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
- }
-}
-
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
- Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
-
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ains != NULL) {
- for (int index = inLen; --index >= 0;) {
- const Allocation* ain = ains[index];
-
- if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- }
- }
-
if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null out allocations");
return;
}
- if (ains != NULL) {
+ if (inLen > 0) {
const Allocation *ain0 = ains[0];
const Type *inType = ain0->getType();
@@ -951,11 +840,12 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->fep.dimZ = outType->getDimZ();
} else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null allocations");
return;
}
- if (ains != NULL && aout != NULL) {
+ if (inLen > 0 && aout != NULL) {
if (!ains[0]->hasSameDims(aout)) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"Failed to launch kernel; dimensions of input and output allocations do not match.");
@@ -1002,7 +892,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
- rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0));
+ rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
mtls->rsc = mCtx;
mtls->ains = ains;
@@ -1012,18 +902,28 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->mSliceSize = 1;
mtls->mSliceNum = 0;
- mtls->fep.ptrIns = NULL;
- mtls->fep.eStrideIn = 0;
+ mtls->fep.inPtrs = NULL;
+ mtls->fep.inStrides = NULL;
mtls->isThreadable = mIsThreadable;
- if (ains) {
- mtls->fep.ptrIns = new const uint8_t*[inLen];
- mtls->fep.inStrides = new StridePair[inLen];
+ if (inLen > 0) {
+
+ if (inLen <= RS_KERNEL_INPUT_THRESHOLD) {
+ mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff;
+ mtls->fep.inStrides = mtls->inStridesBuff;
+ } else {
+ mtls->fep.heapAllocatedArrays = true;
+
+ mtls->fep.inPtrs = new const uint8_t*[inLen];
+ mtls->fep.inStrides = new StridePair[inLen];
+ }
+
+ mtls->fep.inLen = inLen;
for (int index = inLen; --index >= 0;) {
const Allocation *ain = ains[index];
- mtls->fep.ptrIns[index] =
+ mtls->fep.inPtrs[index] =
(const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr;
mtls->fep.inStrides[index].eStride =
@@ -1033,41 +933,27 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
}
}
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
+ mtls->fep.outPtr = NULL;
+ mtls->fep.outStride.eStride = 0;
+ mtls->fep.outStride.yStride = 0;
+ if (aout != NULL) {
+ mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
+
+ mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes();
+ mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride;
}
}
void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- forEachKernelSetup(slot, &mtls);
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-}
-
-void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
forEachKernelSetup(slot, &mtls);
@@ -1338,17 +1224,15 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
return NULL;
}
-void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
-void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
}