summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuScript.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
-rw-r--r--cpu_ref/rsCpuScript.cpp214
1 files changed, 49 insertions, 165 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index a11fda19..05984207 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -789,144 +789,33 @@ void RsdCpuScriptImpl::populateScript(Script *script) {
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
+void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
const void * usr, uint32_t usrLen,
const RsScriptCall *sc,
MTLaunchStruct *mtls) {
memset(mtls, 0, sizeof(MTLaunchStruct));
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
- return;
- }
-
- if (ain != NULL) {
- const Type *inType = ain->getType();
+ for (int index = inLen; --index >= 0;) {
+ const Allocation* ain = ains[index];
- mtls->fep.dimX = inType->getDimX();
- mtls->fep.dimY = inType->getDimY();
- mtls->fep.dimZ = inType->getDimZ();
-
- } else if (aout != NULL) {
- const Type *outType = aout->getType();
-
- mtls->fep.dimX = outType->getDimX();
- mtls->fep.dimY = outType->getDimY();
- mtls->fep.dimZ = outType->getDimZ();
-
- } else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (ain != NULL && aout != NULL) {
- if (!ain->hasSameDims(aout)) {
+ // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
+ if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; dimensions of input and output allocations do not match.");
-
+ "rsForEach called with null in allocations");
return;
}
}
- if (!sc || (sc->xEnd == 0)) {
- mtls->xEnd = mtls->fep.dimX;
- } else {
- rsAssert(sc->xStart < mtls->fep.dimX);
- rsAssert(sc->xEnd <= mtls->fep.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
- mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
- if (mtls->xStart >= mtls->xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->yEnd = mtls->fep.dimY;
- } else {
- rsAssert(sc->yStart < mtls->fep.dimY);
- rsAssert(sc->yEnd <= mtls->fep.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
- mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
- if (mtls->yStart >= mtls->yEnd) return;
- }
-
- if (!sc || (sc->zEnd == 0)) {
- mtls->zEnd = mtls->fep.dimZ;
- } else {
- rsAssert(sc->zStart < mtls->fep.dimZ);
- rsAssert(sc->zEnd <= mtls->fep.dimZ);
- rsAssert(sc->zStart < sc->zEnd);
- mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
- mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
- if (mtls->zStart >= mtls->zEnd) return;
- }
-
- mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
- mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
- mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
- mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
- rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
- mtls->rsc = mCtx;
- mtls->ain = ain;
- mtls->aout = aout;
- mtls->fep.usr = usr;
- mtls->fep.usrLen = usrLen;
- mtls->mSliceSize = 1;
- mtls->mSliceNum = 0;
-
- mtls->fep.ptrIn = NULL;
- mtls->fep.eStrideIn = 0;
- mtls->isThreadable = mIsThreadable;
-
- if (ain) {
- mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
- }
-
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
- }
-}
-
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
- Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
-
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ains != NULL) {
- for (int index = inLen; --index >= 0;) {
- const Allocation* ain = ains[index];
-
- if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- }
- }
-
if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null out allocations");
return;
}
- if (ains != NULL) {
+ if (inLen > 0) {
const Allocation *ain0 = ains[0];
const Type *inType = ain0->getType();
@@ -951,11 +840,12 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->fep.dimZ = outType->getDimZ();
} else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null allocations");
return;
}
- if (ains != NULL && aout != NULL) {
+ if (inLen > 0 && aout != NULL) {
if (!ains[0]->hasSameDims(aout)) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"Failed to launch kernel; dimensions of input and output allocations do not match.");
@@ -1002,7 +892,7 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
- rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0));
+ rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
mtls->rsc = mCtx;
mtls->ains = ains;
@@ -1012,18 +902,28 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
mtls->mSliceSize = 1;
mtls->mSliceNum = 0;
- mtls->fep.ptrIns = NULL;
- mtls->fep.eStrideIn = 0;
+ mtls->fep.inPtrs = NULL;
+ mtls->fep.inStrides = NULL;
mtls->isThreadable = mIsThreadable;
- if (ains) {
- mtls->fep.ptrIns = new const uint8_t*[inLen];
- mtls->fep.inStrides = new StridePair[inLen];
+ if (inLen > 0) {
+
+ if (inLen <= RS_KERNEL_INPUT_THRESHOLD) {
+ mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff;
+ mtls->fep.inStrides = mtls->inStridesBuff;
+ } else {
+ mtls->fep.heapAllocatedArrays = true;
+
+ mtls->fep.inPtrs = new const uint8_t*[inLen];
+ mtls->fep.inStrides = new StridePair[inLen];
+ }
+
+ mtls->fep.inLen = inLen;
for (int index = inLen; --index >= 0;) {
const Allocation *ain = ains[index];
- mtls->fep.ptrIns[index] =
+ mtls->fep.inPtrs[index] =
(const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr;
mtls->fep.inStrides[index].eStride =
@@ -1033,41 +933,27 @@ void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen
}
}
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
+ mtls->fep.outPtr = NULL;
+ mtls->fep.outStride.eStride = 0;
+ mtls->fep.outStride.yStride = 0;
+ if (aout != NULL) {
+ mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
+
+ mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes();
+ mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride;
}
}
void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- forEachKernelSetup(slot, &mtls);
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-}
-
-void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
forEachKernelSetup(slot, &mtls);
@@ -1338,17 +1224,15 @@ Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
return NULL;
}
-void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
-void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
}