summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuScript.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cpu_ref/rsCpuScript.cpp')
-rw-r--r--cpu_ref/rsCpuScript.cpp41
1 files changed, 27 insertions, 14 deletions
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index f4abe67f..6c534cf9 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -258,6 +258,11 @@ static void setCompileArguments(std::vector<const char*>* args,
args->push_back("-mtriple");
args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
+ // Enable workaround for A53 codegen by default.
+#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
+ args->push_back("-aarch64-fix-cortex-a53-835769");
+#endif
+
// Execute the bcc compiler.
if (useRSDebugContext) {
args->push_back("-rs-debug-ctx");
@@ -381,7 +386,6 @@ RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
mFieldIsObject = nullptr;
mForEachSignatures = nullptr;
#else
- mCompilerContext = nullptr;
mCompilerDriver = nullptr;
mExecutable = nullptr;
#endif
@@ -409,17 +413,9 @@ bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
#ifndef RS_COMPATIBILITY_LIB
bool useRSDebugContext = false;
- mCompilerContext = nullptr;
mCompilerDriver = nullptr;
mExecutable = nullptr;
- mCompilerContext = new bcc::BCCContext();
- if (mCompilerContext == nullptr) {
- ALOGE("bcc: FAILS to create compiler context (out of memory)");
- mCtx->unlockMutex();
- return false;
- }
-
mCompilerDriver = new bcc::RSCompilerDriver();
if (mCompilerDriver == nullptr) {
ALOGE("bcc: FAILS to create compiler driver (out of memory)");
@@ -1033,14 +1029,34 @@ void RsdCpuScriptImpl::invokeFreeChildren() {
void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
size_t paramLength) {
//ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
+ void * ap = nullptr;
+
+#if defined(__x86_64__)
+ // The invoked function could have input parameter of vector type for example float4 which
+ // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
+ // So try to align void* params before passing them into RS exported function.
+
+ if ((uint8_t)(uint64_t)params & 0x0F) {
+ if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
+ memcpy(ap, params, paramLength);
+ } else {
+ ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned.");
+ }
+ }
+#endif
RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
reinterpret_cast<void (*)(const void *, uint32_t)>(
#ifndef RS_COMPATIBILITY_LIB
- mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
+ mExecutable->getExportFuncAddrs()[slot])(ap ? (const void *)ap : params, paramLength);
#else
- mInvokeFunctions[slot])(params, paramLength);
+ mInvokeFunctions[slot])(ap ? (const void *)ap : params, paramLength);
#endif
+
+#if defined(__x86_64__)
+ if (ap) free(ap);
+#endif
+
mCtx->setTLS(oldTLS);
}
@@ -1199,9 +1215,6 @@ RsdCpuScriptImpl::~RsdCpuScriptImpl() {
}
}
- if (mCompilerContext) {
- delete mCompilerContext;
- }
if (mCompilerDriver) {
delete mCompilerDriver;
}