diff options
| author | Stephen Hines <srhines@google.com> | 2014-08-13 17:32:43 +0000 |
|---|---|---|
| committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2014-08-12 19:57:35 +0000 |
| commit | cd8df40f07d88f896bea05ed06a20d1a4d5e9728 (patch) | |
| tree | b028521e6474ab22bc99571ead62e1e4f0cb2dc6 /cpu_ref/rsCpuScript.cpp | |
| parent | f2dd1651ea50bf176d5e6580cc0589f7a3e89995 (diff) | |
| parent | 4b2bea3dc20865f3a198797702e19912a6a2171c (diff) | |
Merge "Revert "Collapse code paths for single- and multi-input kernels.""
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
| -rw-r--r-- | cpu_ref/rsCpuScript.cpp | 214 |
1 files changed, 165 insertions, 49 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index 05984207..a11fda19 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -789,33 +789,144 @@ void RsdCpuScriptImpl::populateScript(Script *script) { typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); -void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, - uint32_t inLen, - Allocation * aout, +void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc, MTLaunchStruct *mtls) { memset(mtls, 0, sizeof(MTLaunchStruct)); - for (int index = inLen; --index >= 0;) { - const Allocation* ain = ains[index]; + // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface + if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); + return; + } + if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); + return; + } + + if (ain != NULL) { + const Type *inType = ain->getType(); - // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface - if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { + mtls->fep.dimX = inType->getDimX(); + mtls->fep.dimY = inType->getDimY(); + mtls->fep.dimZ = inType->getDimZ(); + + } else if (aout != NULL) { + const Type *outType = aout->getType(); + + mtls->fep.dimX = outType->getDimX(); + mtls->fep.dimY = outType->getDimY(); + mtls->fep.dimZ = outType->getDimZ(); + + } else { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + return; + } + + if (ain != NULL && aout != NULL) { + if (!ain->hasSameDims(aout)) { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, - "rsForEach called with null in allocations"); + "Failed to launch kernel; dimensions of input and output allocations do not match."); + return; } } + if (!sc || (sc->xEnd == 0)) { + mtls->xEnd = mtls->fep.dimX; + } else { + rsAssert(sc->xStart < mtls->fep.dimX); + rsAssert(sc->xEnd <= mtls->fep.dimX); + rsAssert(sc->xStart < sc->xEnd); + mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); + mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); + if (mtls->xStart >= mtls->xEnd) return; + } + + if (!sc || (sc->yEnd == 0)) { + mtls->yEnd = mtls->fep.dimY; + } else { + rsAssert(sc->yStart < mtls->fep.dimY); + rsAssert(sc->yEnd <= mtls->fep.dimY); + rsAssert(sc->yStart < sc->yEnd); + mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); + mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); + if (mtls->yStart >= mtls->yEnd) return; + } + + if (!sc || (sc->zEnd == 0)) { + mtls->zEnd = mtls->fep.dimZ; + } else { + rsAssert(sc->zStart < mtls->fep.dimZ); + rsAssert(sc->zEnd <= mtls->fep.dimZ); + rsAssert(sc->zStart < sc->zEnd); + mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); + mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); + if (mtls->zStart >= mtls->zEnd) return; + } + + mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); + mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); + mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); + mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); + + rsAssert(!ain || (ain->getType()->getDimZ() == 0)); + + mtls->rsc = mCtx; + mtls->ain = ain; + mtls->aout = aout; + mtls->fep.usr = usr; + mtls->fep.usrLen = usrLen; + mtls->mSliceSize = 1; + mtls->mSliceNum = 0; + + mtls->fep.ptrIn = NULL; + mtls->fep.eStrideIn = 0; + mtls->isThreadable = mIsThreadable; + + if (ain) { + mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr; + mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); + mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride; + } + + mtls->fep.ptrOut = NULL; + mtls->fep.eStrideOut = 0; + if (aout) { + mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; + mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); + mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; + } +} + +void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen, + Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc, + MTLaunchStruct *mtls) { + + memset(mtls, 0, sizeof(MTLaunchStruct)); + + // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface + if (ains != NULL) { + for (int index = inLen; --index >= 0;) { + const Allocation* ain = ains[index]; + + if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); + return; + } + } + } + if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, - "rsForEach called with null out allocations"); + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); return; } - if (inLen > 0) { + if (ains != NULL) { const Allocation *ain0 = ains[0]; const Type *inType = ain0->getType(); @@ -840,12 +951,11 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, mtls->fep.dimZ = outType->getDimZ(); } else { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, - "rsForEach called with null allocations"); + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); return; } - if (inLen > 0 && aout != NULL) { + if (ains != NULL && aout != NULL) { if (!ains[0]->hasSameDims(aout)) { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "Failed to launch kernel; dimensions of input and output allocations do not match."); @@ -892,7 +1002,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); - rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0)); + rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0)); mtls->rsc = mCtx; mtls->ains = ains; @@ -902,28 +1012,18 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, mtls->mSliceSize = 1; mtls->mSliceNum = 0; - mtls->fep.inPtrs = NULL; - mtls->fep.inStrides = NULL; + mtls->fep.ptrIns = NULL; + mtls->fep.eStrideIn = 0; mtls->isThreadable = mIsThreadable; - if (inLen > 0) { - - if (inLen <= RS_KERNEL_INPUT_THRESHOLD) { - mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff; - mtls->fep.inStrides = mtls->inStridesBuff; - } else { - mtls->fep.heapAllocatedArrays = true; - - mtls->fep.inPtrs = new const uint8_t*[inLen]; - mtls->fep.inStrides = new StridePair[inLen]; - } - - mtls->fep.inLen = inLen; + if (ains) { + mtls->fep.ptrIns = new const uint8_t*[inLen]; + mtls->fep.inStrides = new StridePair[inLen]; for (int index = inLen; --index >= 0;) { const Allocation *ain = ains[index]; - mtls->fep.inPtrs[index] = + mtls->fep.ptrIns[index] = (const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr; mtls->fep.inStrides[index].eStride = @@ -933,27 +1033,41 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, } } - mtls->fep.outPtr = NULL; - mtls->fep.outStride.eStride = 0; - mtls->fep.outStride.yStride = 0; - if (aout != NULL) { - mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; - - mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes(); - mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride; + mtls->fep.ptrOut = NULL; + mtls->fep.eStrideOut = 0; + if (aout) { + mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; + mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); + mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; } } void RsdCpuScriptImpl::invokeForEach(uint32_t slot, - const Allocation ** ains, - uint32_t inLen, + const Allocation * ain, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc) { MTLaunchStruct mtls; + forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); + forEachKernelSetup(slot, &mtls); + + RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); + mCtx->launchThreads(ain, aout, sc, &mtls); + mCtx->setTLS(oldTLS); +} + +void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot, + const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) { + + MTLaunchStruct mtls; forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); forEachKernelSetup(slot, &mtls); @@ -1224,15 +1338,17 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { return NULL; } -void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, - uint32_t inLen, Allocation * aout, - const void * usr, uint32_t usrLen, - const RsScriptCall *sc) {} +void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain, + Allocation * aout, const void * usr, + uint32_t usrLen, const RsScriptCall *sc) +{ +} -void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, - uint32_t inLen, Allocation * aout, - const void * usr, uint32_t usrLen, - const RsScriptCall *sc) {} +void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain, + Allocation * aout, const void * usr, + uint32_t usrLen, const RsScriptCall *sc) +{ +} } |
