diff options
| author | Chris Wailes <chriswailes@google.com> | 2014-07-16 15:18:30 -0700 |
|---|---|---|
| committer | Chris Wailes <chriswailes@google.com> | 2014-08-15 18:26:36 -0700 |
| commit | f37121300217d3b39ab66dd9c8881bcbcad932df (patch) | |
| tree | 2e06a02e490f318cc03b95b03112860e3d38e9a6 /cpu_ref/rsCpuScriptGroup.cpp | |
| parent | cd8df40f07d88f896bea05ed06a20d1a4d5e9728 (diff) | |
Collapse code paths for single- and multi-input kernels.
This patch simplifies the RenderScript driver and CPU reference implementation
by removing the distinction between sing- and multi-input kernels in many
places. The distinction is maintained in some places due to the need to
maintain backwards compatibility. This permits the deletion of some functions
and struct members that are no longer needed. Several related functions were
also cleaned up.
Change-Id: Id70a223ea5e3aa2b0b935b2b7f9af933339ae8a4
Diffstat (limited to 'cpu_ref/rsCpuScriptGroup.cpp')
| -rw-r--r-- | cpu_ref/rsCpuScriptGroup.cpp | 103 |
1 files changed, 76 insertions, 27 deletions
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp index 08785523..20ee09db 100644 --- a/cpu_ref/rsCpuScriptGroup.cpp +++ b/cpu_ref/rsCpuScriptGroup.cpp @@ -53,38 +53,45 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, uint32_t instep, uint32_t outstep) { - const ScriptList *sl = (const ScriptList *)kparams->usr; + const ScriptList *sl = (const ScriptList *)kparams->usr; RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams; + const void **oldIns = mkparams->ins; + uint32_t *oldStrides = mkparams->inEStrides; + + void *localIns[1]; + uint32_t localStride[1]; + + mkparams->ins = (const void**)localIns; + mkparams->inEStrides = localStride; + for (size_t ct = 0; ct < sl->count; ct++) { ScriptGroupRootFunc_t func; func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; mkparams->usr = sl->usrPtrs[ct]; - mkparams->in = NULL; - mkparams->out = NULL; - - uint32_t istep = 0; - uint32_t ostep = 0; - if (sl->ins[ct]) { - mkparams->in = - (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; + localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; - istep = sl->ins[ct]->mHal.state.elementSizeBytes; + localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; if (sl->inExts[ct]) { - mkparams->in = - (const uint8_t *)mkparams->in + - sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y; + localIns[0] = (void*) + ((const uint8_t *)localIns[0] + + sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y); } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { - mkparams->in = - (const uint8_t *)mkparams->in + - sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid; + localIns[0] = (void*) + ((const uint8_t *)localIns[0] + + sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid); } + + } else { + localIns[0] = NULL; + localStride[0] = 0; } + uint32_t ostep; if (sl->outs[ct]) { mkparams->out = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; @@ -101,14 +108,23 @@ void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, (uint8_t *)mkparams->out + sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid; } + } else { + mkparams->out = NULL; + ostep = 0; } //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); - func(kparams, xstart, xend, istep, ostep); + /* + * The fourth argument is zero here because kernels get their stride + * information from a member of p that points to an array. + */ + func(kparams, xstart, xend, 0, ostep); } //ALOGE("script group root"); - mkparams->usr = sl; + mkparams->ins = oldIns; + mkparams->inEStrides = oldStrides; + mkparams->usr = sl; } @@ -195,17 +211,33 @@ void CpuScriptGroupImpl::execute() { MTLaunchStruct mtls; - if(fieldDep) { + if (fieldDep) { for (size_t ct=0; ct < ins.size(); ct++) { Script *s = kernels[ct]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); uint32_t slot = kernels[ct]->mSlot; - si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); + uint32_t inLen; + const Allocation **ains; + + if (ins[ct] == NULL) { + inLen = 0; + ains = NULL; + + } else { + inLen = 1; + ains = const_cast<const Allocation**>(&ins[ct]); + } + + si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls); + si->forEachKernelSetup(slot, &mtls); - si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); - mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); - si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL); + si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, + mtls.fep.usrLen, NULL); + + mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls); + + si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL); } } else { ScriptList sl; @@ -214,6 +246,18 @@ void CpuScriptGroupImpl::execute() { sl.kernels = kernels.array(); sl.count = kernels.size(); + uint32_t inLen; + const Allocation **ains; + + if (ins[0] == NULL) { + inLen = 0; + ains = NULL; + + } else { + inLen = 1; + ains = const_cast<const Allocation**>(&ins[0]); + } + Vector<const void *> usrPtrs; Vector<const void *> fnPtrs; Vector<uint32_t> sigs; @@ -225,7 +269,8 @@ void CpuScriptGroupImpl::execute() { fnPtrs.add((void *)mtls.kernel); usrPtrs.add(mtls.fep.usr); sigs.add(mtls.fep.usrLen); - si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); + si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], + mtls.fep.usr, mtls.fep.usrLen, NULL); } sl.sigs = sigs.array(); sl.usrPtrs = usrPtrs.array(); @@ -235,16 +280,20 @@ void CpuScriptGroupImpl::execute() { Script *s = kernels[0]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); - si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); + + si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls); + mtls.script = NULL; mtls.kernel = (void (*)())&scriptGroupRoot; mtls.fep.usr = &sl; - mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); + + mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls); for (size_t ct=0; ct < kernels.size(); ct++) { Script *s = kernels[ct]->mScript; RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); - si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL); + si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0, + NULL); } } } |
