diff options
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
| -rw-r--r-- | cpu_ref/rsCpuScript.cpp | 214 |
1 files changed, 49 insertions, 165 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index a11fda19..05984207 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -789,144 +789,33 @@ void RsdCpuScriptImpl::populateScript(Script *script) { typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); -void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout, +void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, + uint32_t inLen, + Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc, MTLaunchStruct *mtls) { memset(mtls, 0, sizeof(MTLaunchStruct)); - // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface - if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); - return; - } - if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); - return; - } - - if (ain != NULL) { - const Type *inType = ain->getType(); + for (int index = inLen; --index >= 0;) { + const Allocation* ain = ains[index]; - mtls->fep.dimX = inType->getDimX(); - mtls->fep.dimY = inType->getDimY(); - mtls->fep.dimZ = inType->getDimZ(); - - } else if (aout != NULL) { - const Type *outType = aout->getType(); - - mtls->fep.dimX = outType->getDimX(); - mtls->fep.dimY = outType->getDimY(); - mtls->fep.dimZ = outType->getDimZ(); - - } else { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); - return; - } - - if (ain != NULL && aout != NULL) { - if (!ain->hasSameDims(aout)) { + // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface + if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, - "Failed to launch kernel; dimensions of input and output allocations do not match."); - + "rsForEach called with null in allocations"); return; } } - if (!sc || (sc->xEnd == 0)) { - mtls->xEnd = mtls->fep.dimX; - } else { - rsAssert(sc->xStart < mtls->fep.dimX); - rsAssert(sc->xEnd <= mtls->fep.dimX); - rsAssert(sc->xStart < sc->xEnd); - mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); - mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); - if (mtls->xStart >= mtls->xEnd) return; - } - - if (!sc || (sc->yEnd == 0)) { - mtls->yEnd = mtls->fep.dimY; - } else { - rsAssert(sc->yStart < mtls->fep.dimY); - rsAssert(sc->yEnd <= mtls->fep.dimY); - rsAssert(sc->yStart < sc->yEnd); - mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); - mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); - if (mtls->yStart >= mtls->yEnd) return; - } - - if (!sc || (sc->zEnd == 0)) { - mtls->zEnd = mtls->fep.dimZ; - } else { - rsAssert(sc->zStart < mtls->fep.dimZ); - rsAssert(sc->zEnd <= mtls->fep.dimZ); - rsAssert(sc->zStart < sc->zEnd); - mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); - mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); - if (mtls->zStart >= mtls->zEnd) return; - } - - mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); - mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); - mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); - mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); - - rsAssert(!ain || (ain->getType()->getDimZ() == 0)); - - mtls->rsc = mCtx; - mtls->ain = ain; - mtls->aout = aout; - mtls->fep.usr = usr; - mtls->fep.usrLen = usrLen; - mtls->mSliceSize = 1; - mtls->mSliceNum = 0; - - mtls->fep.ptrIn = NULL; - mtls->fep.eStrideIn = 0; - mtls->isThreadable = mIsThreadable; - - if (ain) { - mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr; - mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); - mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride; - } - - mtls->fep.ptrOut = NULL; - mtls->fep.eStrideOut = 0; - if (aout) { - mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; - mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); - mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; - } -} - -void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen, - Allocation * aout, - const void * usr, uint32_t usrLen, - const RsScriptCall *sc, - MTLaunchStruct *mtls) { - - memset(mtls, 0, sizeof(MTLaunchStruct)); - - // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface - if (ains != NULL) { - for (int index = inLen; --index >= 0;) { - const Allocation* ain = ains[index]; - - if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); - return; - } - } - } - if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "rsForEach called with null out allocations"); return; } - if (ains != NULL) { + if (inLen > 0) { const Allocation *ain0 = ains[0]; const Type *inType = ain0->getType(); @@ -951,11 +840,12 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen mtls->fep.dimZ = outType->getDimZ(); } else { - mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "rsForEach called with null allocations"); return; } - if (ains != NULL && aout != NULL) { + if (inLen > 0 && aout != NULL) { if (!ains[0]->hasSameDims(aout)) { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "Failed to launch kernel; dimensions of input and output allocations do not match."); @@ -1002,7 +892,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); - rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0)); + rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0)); mtls->rsc = mCtx; mtls->ains = ains; @@ -1012,18 +902,28 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen mtls->mSliceSize = 1; mtls->mSliceNum = 0; - mtls->fep.ptrIns = NULL; - mtls->fep.eStrideIn = 0; + mtls->fep.inPtrs = NULL; + mtls->fep.inStrides = NULL; mtls->isThreadable = mIsThreadable; - if (ains) { - mtls->fep.ptrIns = new const uint8_t*[inLen]; - mtls->fep.inStrides = new StridePair[inLen]; + if (inLen > 0) { + + if (inLen <= RS_KERNEL_INPUT_THRESHOLD) { + mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff; + mtls->fep.inStrides = mtls->inStridesBuff; + } else { + mtls->fep.heapAllocatedArrays = true; + + mtls->fep.inPtrs = new const uint8_t*[inLen]; + mtls->fep.inStrides = new StridePair[inLen]; + } + + mtls->fep.inLen = inLen; for (int index = inLen; --index >= 0;) { const Allocation *ain = ains[index]; - mtls->fep.ptrIns[index] = + mtls->fep.inPtrs[index] = (const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr; mtls->fep.inStrides[index].eStride = @@ -1033,41 +933,27 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen } } - mtls->fep.ptrOut = NULL; - mtls->fep.eStrideOut = 0; - if (aout) { - mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; - mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); - mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; + mtls->fep.outPtr = NULL; + mtls->fep.outStride.eStride = 0; + mtls->fep.outStride.yStride = 0; + if (aout != NULL) { + mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; + + mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes(); + mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride; } } void RsdCpuScriptImpl::invokeForEach(uint32_t slot, - const Allocation * ain, + const Allocation ** ains, + uint32_t inLen, Allocation * aout, const void * usr, uint32_t usrLen, const RsScriptCall *sc) { MTLaunchStruct mtls; - forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); - forEachKernelSetup(slot, &mtls); - - RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); - mCtx->launchThreads(ain, aout, sc, &mtls); - mCtx->setTLS(oldTLS); -} - -void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot, - const Allocation ** ains, - uint32_t inLen, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc) { - - MTLaunchStruct mtls; forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); forEachKernelSetup(slot, &mtls); @@ -1338,17 +1224,15 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { return NULL; } -void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) -{ -} +void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) {} -void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain, - Allocation * aout, const void * usr, - uint32_t usrLen, const RsScriptCall *sc) -{ -} +void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, + uint32_t inLen, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc) {} } |
