diff options
| author | Yong Chen <yong.a.chen@intel.com> | 2014-12-12 13:25:18 +0800 |
|---|---|---|
| committer | Yong Chen <yong.a.chen@intel.com> | 2014-12-12 16:26:55 +0800 |
| commit | eaba5a3ca215729258dcf9ac6f0bb5f88c78f998 (patch) | |
| tree | 9fce1150a3fc3dc602914312c7f44fb72d27f092 /cpu_ref/rsCpuScript.cpp | |
| parent | 5c9fcdab0f89b7cf18effdc3fd5becbac59d35e0 (diff) | |
Align ScriptInvokeV parameters to 16 bytes for x86_64
Change-Id: I0c9aea220b12f364c0a1a90232fbaa40ad90aeec
Signed-off-by: Yong Chen <yong.a.chen@intel.com>
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
| -rw-r--r-- | cpu_ref/rsCpuScript.cpp | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp index f4abe67f..0c27e4bc 100644 --- a/cpu_ref/rsCpuScript.cpp +++ b/cpu_ref/rsCpuScript.cpp @@ -1033,14 +1033,34 @@ void RsdCpuScriptImpl::invokeFreeChildren() { void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, size_t paramLength) { //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); + void * ap = nullptr; + +#if defined(__x86_64__) + // The invoked function could have input parameter of vector type for example float4 which + // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform. + // So try to align void* params before passing them into RS exported function. + + if ((uint8_t)(uint64_t)params & 0x0F) { + if ((ap = (void*)memalign(16, paramLength)) != nullptr) { + memcpy(ap, params, paramLength); + } else { + ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned."); + } + } +#endif RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); reinterpret_cast<void (*)(const void *, uint32_t)>( #ifndef RS_COMPATIBILITY_LIB - mExecutable->getExportFuncAddrs()[slot])(params, paramLength); + mExecutable->getExportFuncAddrs()[slot])(ap ? (const void *)ap : params, paramLength); #else - mInvokeFunctions[slot])(params, paramLength); + mInvokeFunctions[slot])(ap ? (const void *)ap : params, paramLength); +#endif + +#if defined(__x86_64__) + if (ap) free(ap); #endif + mCtx->setTLS(oldTLS); } |
