diff options
Diffstat (limited to 'cpu_ref/rsCpuScriptGroup.cpp')
| -rw-r--r-- | cpu_ref/rsCpuScriptGroup.cpp | 103 |
1 files changed, 76 insertions, 27 deletions
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp index 08785523..20ee09db 100644 --- a/cpu_ref/rsCpuScriptGroup.cpp +++ b/cpu_ref/rsCpuScriptGroup.cpp @@ -53,38 +53,45 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, uint32_t instep, uint32_t outstep) { - const ScriptList *sl = (const ScriptList *)kparams->usr; + const ScriptList *sl = (const ScriptList *)kparams->usr; RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams; + const void **oldIns = mkparams->ins; + uint32_t *oldStrides = mkparams->inEStrides; + + void *localIns[1]; + uint32_t localStride[1]; + + mkparams->ins = (const void**)localIns; + mkparams->inEStrides = localStride; + for (size_t ct = 0; ct < sl->count; ct++) { ScriptGroupRootFunc_t func; func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; mkparams->usr = sl->usrPtrs[ct]; - mkparams->in = NULL; - mkparams->out = NULL; - - uint32_t istep = 0; - uint32_t ostep = 0; - if (sl->ins[ct]) { - mkparams->in = - (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; + localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; - istep = sl->ins[ct]->mHal.state.elementSizeBytes; + localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; if (sl->inExts[ct]) { - mkparams->in = - (const uint8_t *)mkparams->in + - sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y; + localIns[0] = (void*) + ((const uint8_t *)localIns[0] + + sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y); } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { - mkparams->in = - (const uint8_t *)mkparams->in + - sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid; + localIns[0] = (void*) + ((const uint8_t *)localIns[0] + + sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid); } + + } else { + localIns[0] = NULL; + localStride[0] = 0; } + uint32_t ostep; if (sl->outs[ct]) { mkparams->out = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; @@ -101,14 +108,23 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, (uint8_t *)mkparams->out + sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid; } + } else { + mkparams->out = NULL; + ostep = 0; } //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); - func(kparams, xstart, xend, istep, ostep); + /* + * The fourth argument is zero here because kernels get their stride + * information from a member of p that points to an array. + */ + func(kparams, xstart, xend, 0, ostep); } //ALOGE("script group root"); - mkparams->usr = sl; + mkparams->ins = oldIns; + mkparams->inEStrides = oldStrides; + mkparams->usr = sl; } @@ -195,17 +211,33 @@ void CpuScriptGroupImpl::execute() { MTLaunchStruct mtls; - if(fieldDep) { + if (fieldDep) { for (size_t ct=0; ct < ins.size(); ct++) { Script *s = kernels[ct]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); uint32_t slot = kernels[ct]->mSlot; - si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); + uint32_t inLen; + const Allocation **ains; + + if (ins[ct] == NULL) { + inLen = 0; + ains = NULL; + + } else { + inLen = 1; + ains = const_cast<const Allocation**>(&ins[ct]); + } + + si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls); + si->forEachKernelSetup(slot, &mtls); - si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); - mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); - si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL); + si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, + mtls.fep.usrLen, NULL); + + mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls); + + si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL); } } else { ScriptList sl; @@ -214,6 +246,18 @@ void CpuScriptGroupImpl::execute() { sl.kernels = kernels.array(); sl.count = kernels.size(); + uint32_t inLen; + const Allocation **ains; + + if (ins[0] == NULL) { + inLen = 0; + ains = NULL; + + } else { + inLen = 1; + ains = const_cast<const Allocation**>(&ins[0]); + } + Vector<const void *> usrPtrs; Vector<const void *> fnPtrs; Vector<uint32_t> sigs; @@ -225,7 +269,8 @@ void CpuScriptGroupImpl::execute() { fnPtrs.add((void *)mtls.kernel); usrPtrs.add(mtls.fep.usr); sigs.add(mtls.fep.usrLen); - si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); + si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], + mtls.fep.usr, mtls.fep.usrLen, NULL); } sl.sigs = sigs.array(); sl.usrPtrs = usrPtrs.array(); @@ -235,16 +280,20 @@ void CpuScriptGroupImpl::execute() { Script *s = kernels[0]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); - si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); + + si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls); + mtls.script = NULL; mtls.kernel = (void (*)())&scriptGroupRoot; mtls.fep.usr = &sl; - mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); + + mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls); for (size_t ct=0; ct < kernels.size(); ct++) { Script *s = kernels[ct]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); - si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL); + si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0, + NULL); } } } |
