diff options
| author | Chris Wailes <chriswailes@google.com> | 2014-06-11 12:00:29 -0700 |
|---|---|---|
| committer | Chris Wailes <chriswailes@google.com> | 2014-07-08 11:46:34 -0700 |
| commit | 4b3c34e6833e39bc89c2128002806b654b8e623d (patch) | |
| tree | d5e1eeed6ac0e0f9b57488570680b6796ecd7735 /cpu_ref/rsCpuScript.cpp | |
| parent | cb51798a0379409c0f9927c44bbcdd772ed7ec18 (diff) | |
Adds support for multi-input kernels to Frameworks/RS.
This patch modifies Frameworks/RS in the following ways:
* Adjusted the data-layout of the C/C++ version of RsForEachStubParamStruct to
accommodate a pointer to an array of input allocations and a pointer to an
array of stride sizes for each of these allocatoins.
* Adds a new code path for Java code to pass multiple allocations to a RS
kernel.
* Packs base pointers and step values for multi-input kernels into the new
RsForEachStubParamStruct members.
Change-Id: I46d2834c37075b2a2407fd8b010546818a4540d1
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
| -rw-r--r-- | cpu_ref/rsCpuScript.cpp | 192 |
1 files changed, 182 insertions, 10 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index cf7b377d..b9f8aba8 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -806,21 +806,34 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aou return; } - if (ain) { - mtls->fep.dimX = ain->getType()->getDimX(); - mtls->fep.dimY = ain->getType()->getDimY(); - mtls->fep.dimZ = ain->getType()->getDimZ(); - //mtls->dimArray = ain->getType()->getDimArray(); - } else if (aout) { - mtls->fep.dimX = aout->getType()->getDimX(); - mtls->fep.dimY = aout->getType()->getDimY(); - mtls->fep.dimZ = aout->getType()->getDimZ(); - //mtls->dimArray = aout->getType()->getDimArray(); + if (ain != NULL) { + const Type *inType = ain->getType(); + + mtls->fep.dimX = inType->getDimX(); + mtls->fep.dimY = inType->getDimY(); + mtls->fep.dimZ = inType->getDimZ(); + + } else if (aout != NULL) { + const Type *outType = aout->getType(); + + mtls->fep.dimX = outType->getDimX(); + mtls->fep.dimY = outType->getDimY(); + mtls->fep.dimZ = outType->getDimZ(); + } else { mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); return; } + if (ain != NULL && aout != NULL) { + if (!ain->hasSameDims(aout)) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "Failed to launch kernel; dimensions of input and output allocations do not match."); + + return; + } + } + if (!sc || (sc->xEnd == 0)) { mtls->xEnd = mtls->fep.dimX; } else { @@ -888,6 +901,147 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aou } } +void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen, + Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc, + MTLaunchStruct *mtls) { + + memset(mtls, 0, sizeof(MTLaunchStruct)); + + // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface + if (ains != NULL) { + for (int index = inLen; --index >= 0;) { + const Allocation* ain = ains[index]; + + if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations"); + return; + } + } + } + + if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations"); + return; + } + + if (ains != NULL) { + const Allocation *ain0 = ains[0]; + const Type *inType = ain0->getType(); + + mtls->fep.dimX = inType->getDimX(); + mtls->fep.dimY = inType->getDimY(); + mtls->fep.dimZ = inType->getDimZ(); + + for (int Index = inLen; --Index >= 1;) { + if (!ain0->hasSameDims(ains[Index])) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "Failed to launch kernel; dimensions of input and output allocations do not match."); + + return; + } + } + + } else if (aout != NULL) { + const Type *outType = aout->getType(); + + mtls->fep.dimX = outType->getDimX(); + mtls->fep.dimY = outType->getDimY(); + mtls->fep.dimZ = outType->getDimZ(); + + } else { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + return; + } + + if (ains != NULL && aout != NULL) { + if (!ains[0]->hasSameDims(aout)) { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, + "Failed to launch kernel; dimensions of input and output allocations do not match."); + + return; + } + } + + if (!sc || (sc->xEnd == 0)) { + mtls->xEnd = mtls->fep.dimX; + } else { + rsAssert(sc->xStart < mtls->fep.dimX); + rsAssert(sc->xEnd <= mtls->fep.dimX); + rsAssert(sc->xStart < sc->xEnd); + mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); + mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); + if (mtls->xStart >= mtls->xEnd) return; + } + + if (!sc || (sc->yEnd == 0)) { + mtls->yEnd = mtls->fep.dimY; + } else { + rsAssert(sc->yStart < mtls->fep.dimY); + rsAssert(sc->yEnd <= mtls->fep.dimY); + rsAssert(sc->yStart < sc->yEnd); + mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); + mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); + if (mtls->yStart >= mtls->yEnd) return; + } + + if (!sc || (sc->zEnd == 0)) { + mtls->zEnd = mtls->fep.dimZ; + } else { + rsAssert(sc->zStart < mtls->fep.dimZ); + rsAssert(sc->zEnd <= mtls->fep.dimZ); + rsAssert(sc->zStart < sc->zEnd); + mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); + mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); + if (mtls->zStart >= mtls->zEnd) return; + } + + mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); + mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); + mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); + mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); + + rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0)); + + mtls->rsc = mCtx; + mtls->ains = ains; + mtls->aout = aout; + mtls->fep.usr = usr; + mtls->fep.usrLen = usrLen; + mtls->mSliceSize = 1; + mtls->mSliceNum = 0; + + mtls->fep.ptrIns = NULL; + mtls->fep.eStrideIn = 0; + mtls->isThreadable = mIsThreadable; + + if (ains) { + mtls->fep.ptrIns = new const uint8_t*[inLen]; + mtls->fep.inStrides = new StridePair[inLen]; + + for (int index = inLen; --index >= 0;) { + const Allocation *ain = ains[index]; + + mtls->fep.ptrIns[index] = + (const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr; + + mtls->fep.inStrides[index].eStride = + ain->getType()->getElementSizeBytes(); + mtls->fep.inStrides[index].yStride = + ain->mHal.drvState.lod[0].stride; + } + } + + mtls->fep.ptrOut = NULL; + mtls->fep.eStrideOut = 0; + if (aout) { + mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; + mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); + mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; + } +} + void RsdCpuScriptImpl::invokeForEach(uint32_t slot, const Allocation * ain, @@ -905,6 +1059,24 @@ void RsdCpuScriptImpl::invokeForEach(uint32_t slot, mCtx->setTLS(oldTLS); } +void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot, + const Allocation ** ains, + uint32_t inLen, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) { + + MTLaunchStruct mtls; + + forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); + forEachKernelSetup(slot, &mtls); + + RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); + mCtx->launchThreads(ains, inLen, aout, sc, &mtls); + mCtx->setTLS(oldTLS); +} + void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { mtls->script = this; mtls->fep.slot = slot; |
