summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuScriptGroup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cpu_ref/rsCpuScriptGroup.cpp')
-rw-r--r--cpu_ref/rsCpuScriptGroup.cpp103
1 files changed, 76 insertions, 27 deletions
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 08785523..20ee09db 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -53,38 +53,45 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
uint32_t instep, uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)kparams->usr;
+ const ScriptList *sl = (const ScriptList *)kparams->usr;
RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
+ const void **oldIns = mkparams->ins;
+ uint32_t *oldStrides = mkparams->inEStrides;
+
+ void *localIns[1];
+ uint32_t localStride[1];
+
+ mkparams->ins = (const void**)localIns;
+ mkparams->inEStrides = localStride;
+
for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
mkparams->usr = sl->usrPtrs[ct];
- mkparams->in = NULL;
- mkparams->out = NULL;
-
- uint32_t istep = 0;
- uint32_t ostep = 0;
-
if (sl->ins[ct]) {
- mkparams->in =
- (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+ localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
- istep = sl->ins[ct]->mHal.state.elementSizeBytes;
+ localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
if (sl->inExts[ct]) {
- mkparams->in =
- (const uint8_t *)mkparams->in +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y;
+ localIns[0] = (void*)
+ ((const uint8_t *)localIns[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
} else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
- mkparams->in =
- (const uint8_t *)mkparams->in +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid;
+ localIns[0] = (void*)
+ ((const uint8_t *)localIns[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
}
+
+ } else {
+ localIns[0] = NULL;
+ localStride[0] = 0;
}
+ uint32_t ostep;
if (sl->outs[ct]) {
mkparams->out =
(uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
@@ -101,14 +108,23 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
(uint8_t *)mkparams->out +
sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
+ } else {
+ mkparams->out = NULL;
+ ostep = 0;
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(kparams, xstart, xend, istep, ostep);
+ /*
+ * The fourth argument is zero here because kernels get their stride
+ * information from a member of p that points to an array.
+ */
+ func(kparams, xstart, xend, 0, ostep);
}
//ALOGE("script group root");
- mkparams->usr = sl;
+ mkparams->ins = oldIns;
+ mkparams->inEStrides = oldStrides;
+ mkparams->usr = sl;
}
@@ -195,17 +211,33 @@ void CpuScriptGroupImpl::execute() {
MTLaunchStruct mtls;
- if(fieldDep) {
+ if (fieldDep) {
for (size_t ct=0; ct < ins.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
uint32_t slot = kernels[ct]->mSlot;
- si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+ uint32_t inLen;
+ const Allocation **ains;
+
+ if (ins[ct] == NULL) {
+ inLen = 0;
+ ains = NULL;
+
+ } else {
+ inLen = 1;
+ ains = const_cast<const Allocation**>(&ins[ct]);
+ }
+
+ si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls);
+
si->forEachKernelSetup(slot, &mtls);
- si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
- mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
- si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
+ si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
+ mtls.fep.usrLen, NULL);
+
+ mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls);
+
+ si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL);
}
} else {
ScriptList sl;
@@ -214,6 +246,18 @@ void CpuScriptGroupImpl::execute() {
sl.kernels = kernels.array();
sl.count = kernels.size();
+ uint32_t inLen;
+ const Allocation **ains;
+
+ if (ins[0] == NULL) {
+ inLen = 0;
+ ains = NULL;
+
+ } else {
+ inLen = 1;
+ ains = const_cast<const Allocation**>(&ins[0]);
+ }
+
Vector<const void *> usrPtrs;
Vector<const void *> fnPtrs;
Vector<uint32_t> sigs;
@@ -225,7 +269,8 @@ void CpuScriptGroupImpl::execute() {
fnPtrs.add((void *)mtls.kernel);
usrPtrs.add(mtls.fep.usr);
sigs.add(mtls.fep.usrLen);
- si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
+ si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
+ mtls.fep.usr, mtls.fep.usrLen, NULL);
}
sl.sigs = sigs.array();
sl.usrPtrs = usrPtrs.array();
@@ -235,16 +280,20 @@ void CpuScriptGroupImpl::execute() {
Script *s = kernels[0]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
- si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
+
+ si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls);
+
mtls.script = NULL;
mtls.kernel = (void (*)())&scriptGroupRoot;
mtls.fep.usr = &sl;
- mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
+
+ mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls);
for (size_t ct=0; ct < kernels.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
- si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
+ si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0,
+ NULL);
}
}
}