diff options
Diffstat (limited to 'cpp')
| -rw-r--r-- | cpp/Android.mk | 1 | ||||
| -rw-r--r-- | cpp/ScriptIntrinsicBLAS.cpp | 1848 | ||||
| -rw-r--r-- | cpp/ScriptIntrinsics.cpp | 2 | ||||
| -rw-r--r-- | cpp/rsCppStructs.h | 2481 |
4 files changed, 4061 insertions, 271 deletions
diff --git a/cpp/Android.mk b/cpp/Android.mk index d53140f4..f1769fac 100644 --- a/cpp/Android.mk +++ b/cpp/Android.mk @@ -28,6 +28,7 @@ rs_cpp_SRC_FILES := \ Script.cpp \ ScriptC.cpp \ ScriptIntrinsics.cpp \ + ScriptIntrinsicBLAS.cpp \ Sampler.cpp LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk diff --git a/cpp/ScriptIntrinsicBLAS.cpp b/cpp/ScriptIntrinsicBLAS.cpp new file mode 100644 index 00000000..63a8f0c9 --- /dev/null +++ b/cpp/ScriptIntrinsicBLAS.cpp @@ -0,0 +1,1848 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "RenderScript.h" +#include "rsCppInternal.h" + +using namespace android; +using namespace RSC; + +// ScriptIntrinsicBLAS APIS +ScriptIntrinsicBLAS::ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e) + : ScriptIntrinsic(rs, RS_SCRIPT_INTRINSIC_ID_BLAS, e) { + +} + +sp<ScriptIntrinsicBLAS> ScriptIntrinsicBLAS::create(sp<RS> rs) { + return new ScriptIntrinsicBLAS(rs, Element::U32(rs)); +} + +enum RsBlasDataType { + SINGLE, + DOUBLE, + SINGLE_COMPLEX, + DOUBLE_COMPLEX +}; + +static RsBlasCall +setUpBLASCall(RsBlasDataType dataType, RsBlasFunction func, + int TransA, int TransB, int Side, int Uplo, int Diag, + int M, int N, int K, int incX, int incY, int KL, int KU, + float alphaF, float betaF, double alphaD, double betaD, + float alphaCX, float alphaCY, float betaCX, float betaCY, + double alphaZX, double alphaZY, double betaZX, double betaZY + ) { + RsBlasCall call; + memset(&call, 0, sizeof(call)); + call.func = func; + call.transA = (RsBlasTranspose)TransA; + call.transB = (RsBlasTranspose)TransB; + call.side = (RsBlasSide)Side; + call.uplo = (RsBlasUplo)Uplo; + call.diag = (RsBlasDiag)Diag; + call.M = M; + call.N = N; + call.K = K; + + switch (dataType) { + case SINGLE: + // For Single-precision BLAS. + call.alpha.f = alphaF; + call.beta.f = betaF; + break; + case DOUBLE: + // For Double-precision BLAS. + call.alpha.d = alphaD; + call.beta.d = betaD; + break; + case SINGLE_COMPLEX: + // For Single-precision complex BLAS. + call.alpha.c.r = alphaCX; + call.alpha.c.i = alphaCY; + call.beta.c.r = betaCX; + call.beta.c.i = betaCY; + break; + case DOUBLE_COMPLEX: + // For Double-precision complex BLAS. + call.alpha.z.r = alphaZX; + call.alpha.z.i = alphaZY; + call.beta.z.r = betaZX; + call.beta.z.i = betaZY; + break; + default: + break; + } + + call.incX = incX; + call.incY = incY; + call.KL = KL; + call.KU = KU; + + return call; +} + +static void +nScriptIntrinsicBLAS_Single(RS* mRS, RsContext con, RsScript id, RsBlasFunction func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + float alpha, RsAllocation A, RsAllocation B, + float beta, RsAllocation C, int incX, int incY, int KL, int KU) { + RsBlasCall call = setUpBLASCall(SINGLE, func, TransA, TransB, Side, Uplo, Diag, + M, N, K, incX, incY, KL, KU, alpha, beta, 0.0, 0.0, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0, 0.0, 0.0, 0.0); + RsAllocation in_allocs[3] = {A, B, C}; + tryDispatch(mRS, RS::dispatch->ScriptForEachMulti(con, id, 0, in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0)); +} + + +static void +nScriptIntrinsicBLAS_Double(RS* mRS, RsContext con, RsScript id, RsBlasFunction func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + double alpha, RsAllocation A, RsAllocation B, + double beta, RsAllocation C, int incX, int incY, int KL, int KU) { + RsBlasCall call = setUpBLASCall(DOUBLE, func, TransA, TransB, Side, Uplo, Diag, + M, N, K, incX, incY, KL, KU, 0.0f, 0.0f, alpha, beta, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0, 0.0, 0.0, 0.0); + RsAllocation in_allocs[3] = {A, B, C}; + tryDispatch(mRS, RS::dispatch->ScriptForEachMulti(con, id, 0, in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0)); +} + +static void +nScriptIntrinsicBLAS_Complex(RS* mRS, RsContext con, RsScript id, RsBlasFunction func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + float alphaX, float alphaY, RsAllocation A, RsAllocation B, + float betaX, float betaY, RsAllocation C, int incX, int incY, int KL, int KU) { + RsBlasCall call = setUpBLASCall(SINGLE_COMPLEX, func, TransA, TransB, Side, Uplo, Diag, + M, N, K, incX, incY, KL, KU, 0.0f, 0.0f, 0.0, 0.0, + alphaX, alphaY, betaX, betaY, 0.0, 0.0, 0.0, 0.0); + RsAllocation in_allocs[3] = {A, B, C}; + tryDispatch(mRS, RS::dispatch->ScriptForEachMulti(con, id, 0, in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0)); +} + +static void +nScriptIntrinsicBLAS_Z(RS* mRS, RsContext con, RsScript id, RsBlasFunction func, int TransA, + int TransB, int Side, int Uplo, int Diag, int M, int N, int K, + double alphaX, double alphaY, RsAllocation A, RsAllocation B, + double betaX, double betaY, RsAllocation C, int incX, int incY, int KL, int KU) { + RsBlasCall call = setUpBLASCall(DOUBLE_COMPLEX, func, TransA, TransB, Side, Uplo, Diag, + M, N, K, incX, incY, KL, KU, 0.0f, 0.0f, 0.0, 0.0, + 0.0f, 0.0f, 0.0f, 0.0f, alphaX, alphaY, betaX, betaY); + RsAllocation in_allocs[3] = {A, B, C}; + tryDispatch(mRS, RS::dispatch->ScriptForEachMulti(con, id, 0, in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0)); +} + + +static void +nScriptIntrinsicBLAS_BNNM(RS* mRS, RsContext con, RsScript id, int M, int N, int K, + RsAllocation A, int a_offset, RsAllocation B, int b_offset, + RsAllocation C, int c_offset, int c_mult_int) { + RsBlasCall call; + memset(&call, 0, sizeof(call)); + call.func = RsBlas_bnnm; + call.M = M; + call.N = N; + call.K = K; + call.a_offset = a_offset & 0xFF; + call.b_offset = b_offset & 0xFF; + call.c_offset = c_offset; + call.c_mult_int = c_mult_int; + + RsAllocation in_allocs[3] = {A, B, C}; + tryDispatch(mRS, RS::dispatch->ScriptForEachMulti(con, id, 0, in_allocs, sizeof(in_allocs), nullptr, + &call, sizeof(call), nullptr, 0)); +} + +/** + * Level 2 BLAS + */ +static void validateGEMV(RS* mRS, sp<const Element> e, RsBlasTranspose TransA, sp<Allocation> A, + sp<Allocation> X, int incX, sp<Allocation> Y, int incY) { + int M = A->getType()->getY(); + int N = A->getType()->getX(); + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = -1, expectedYDim = -1; + if (TransA == RsBlasNoTrans) { + expectedXDim = 1 + (N - 1) * incX; + expectedYDim = 1 + (M - 1) * incY; + } else { + expectedXDim = 1 + (M - 1) * incX; + expectedYDim = 1 + (N - 1) * incY; + } + if ((int)X->getType()->getX() != expectedXDim || + (int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for GEMV"); + } +} + +void ScriptIntrinsicBLAS::SGEMV(RsBlasTranspose TransA, float alpha, sp<Allocation> A, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY) { + validateGEMV(mRS, Element::F32(mRS), TransA, A, X, incX, Y, incY); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sgemv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha, A->getID(), X->getID(), + beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DGEMV(RsBlasTranspose TransA, double alpha, sp<Allocation> A, sp<Allocation> X, + int incX, double beta, sp<Allocation> Y, int incY) { + validateGEMV(mRS, Element::F64(mRS), TransA, A, X, incX, Y, incY); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dgemv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha, A->getID(), X->getID(), + beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CGEMV(RsBlasTranspose TransA, Float2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Float2 beta, sp<Allocation> Y, int incY) { + validateGEMV(mRS, Element::F32_2(mRS), TransA, A, X, incX, Y, incY); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cgemv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZGEMV(RsBlasTranspose TransA, Double2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Double2 beta, sp<Allocation> Y, int incY) { + validateGEMV(mRS, Element::F64_2(mRS), TransA, A, X, incX, Y, incY); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zgemv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::SGBMV(RsBlasTranspose TransA, int KL, int KU, float alpha, sp<Allocation> A, + sp<Allocation> X, int incX, float beta, sp<Allocation> Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(mRS, Element::F32(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "KL and KU must be greater than or equal to 0"); + } + int M = A->getType()->getY(); + int N = A->getType()->getX(); + + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sgbmv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha, A->getID(), X->getID(), + beta, Y->getID(), incX, incY, KL, KU); +} + +void ScriptIntrinsicBLAS::DGBMV(RsBlasTranspose TransA, int KL, int KU, double alpha, sp<Allocation> A, + sp<Allocation> X, int incX, double beta, sp<Allocation> Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(mRS, Element::F64(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "KL and KU must be greater than or equal to 0"); + } + int M = A->getType()->getY(); + int N = A->getType()->getX(); + + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dgbmv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha, A->getID(), X->getID(), + beta, Y->getID(), incX, incY, KL, KU); +} + +void ScriptIntrinsicBLAS::CGBMV(RsBlasTranspose TransA, int KL, int KU, Float2 alpha, sp<Allocation> A, + sp<Allocation> X, int incX, Float2 beta, sp<Allocation> Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(mRS, Element::F32_2(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "KL and KU must be greater than or equal to 0"); + } + int M = A->getType()->getY(); + int N = A->getType()->getX(); + + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cgbmv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, KL, KU); +} + +void ScriptIntrinsicBLAS::ZGBMV(RsBlasTranspose TransA, int KL, int KU, Double2 alpha, sp<Allocation> A, + sp<Allocation> X, int incX, Double2 beta, sp<Allocation> Y, int incY) { + // GBMV has the same validation requirements as GEMV + KL and KU >= 0 + validateGEMV(mRS, Element::F64_2(mRS), TransA, A, X, incX, Y, incY); + if (KL < 0 || KU < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "KL and KU must be greater than or equal to 0"); + } + int M = A->getType()->getY(); + int N = A->getType()->getX(); + + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zgbmv, + TransA, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, KL, KU); +} + +static void validateTRMV(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, RsBlasTranspose TransA, + RsBlasDiag Diag, sp<Allocation> A, sp<Allocation> X, int incX) { + int N = A->getType()->getY(); + if ((int)A->getType()->getX() != N) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "A must be a square matrix for TRMV"); + } + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (incX <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for TRMV"); + } +} + +static int validateTPMV(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, RsBlasTranspose TransA, + RsBlasDiag Diag, sp<Allocation> Ap, sp<Allocation> X, int incX) { + if (!Ap->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Ap must have a Y dimension of 0 or 1"); + } + + int N = sqrt((double)Ap->getType()->getX() * 2); + if ((int)Ap->getType()->getX() != ((N * (N+1)) / 2)) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid dimension for Ap"); + } + if (incX <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for TPMV"); + } + + return N; +} + + +void ScriptIntrinsicBLAS::STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + validateTRMV(mRS, Element::F32(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_strmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + validateTRMV(mRS, Element::F64(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtrmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + validateTRMV(mRS, Element::F32_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctrmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + validateTRMV(mRS, Element::F64_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztrmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be greater than or equal to 0"); + } + validateTRMV(mRS, Element::F32(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_stbmv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be greater than or equal to 0"); + } + validateTRMV(mRS, Element::F64(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtbmv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be greater than or equal to 0"); + } + validateTRMV(mRS, Element::F32_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctbmv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBMV has the same requirements as TRMV + K >= 0 + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be greater than or equal to 0"); + } + validateTRMV(mRS, Element::F64_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztbmv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + int N = validateTPMV(mRS, Element::F32(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_stpmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + Ap->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + int N = validateTPMV(mRS, Element::F64(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtpmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + Ap->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + int N = validateTPMV(mRS, Element::F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctpmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + Ap->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + int N = validateTPMV(mRS, Element::F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztpmv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + Ap->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + // TRSV is the same as TRMV + validateTRMV(mRS, Element::F32(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_strsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + // TRSV is the same as TRMV + validateTRMV(mRS, Element::F64(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtrsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); + +} + +void ScriptIntrinsicBLAS::CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + // TRSV is the same as TRMV + validateTRMV(mRS, Element::F32_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctrsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); + +} + +void ScriptIntrinsicBLAS::ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX) { + // TRSV is the same as TRMV + validateTRMV(mRS, Element::F64_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztrsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); + +} + +void ScriptIntrinsicBLAS::STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(mRS, Element::F32(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Number of diagonals must be positive"); + } + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_stbsv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(mRS, Element::F64(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Number of diagonals must be positive"); + } + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtbsv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, + A->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(mRS, Element::F32_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Number of diagonals must be positive"); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctbsv, + TransA, 0, 0, Uplo, Diag, 0, N, K, + 0, 0, A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX) { + // TBSV is the same as TRMV + K >= 0 + validateTRMV(mRS, Element::F64_2(mRS), Uplo, TransA, Diag, A, X, incX); + int N = A->getType()->getY(); + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Number of diagonals must be positive"); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztbsv, + TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, + A->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(mRS, Element::F32(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_stpsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + Ap->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(mRS, Element::F64(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtpsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, + Ap->getID(), X->getID(), 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(mRS, Element::F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctpsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + Ap->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX) { + // TPSV is same as TPMV + int N = validateTPMV(mRS, Element::F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztpsv, + TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, + Ap->getID(), X->getID(), 0, 0, 0, incX, 0, 0, 0); +} + +/** + * Level 2, S and D only + */ +static int validateSYMV(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, sp<Allocation> A, + sp<Allocation> X, sp<Allocation> Y, int incX, int incY) { + int N = A->getType()->getY(); + if ((int)A->getType()->getX() != N) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "A must be a square matrix for SYMV"); + } + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e) ) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SYMV"); + } + int expectedYDim = 1 + (N - 1) * incY; + if ((int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SYMV"); + } + return N; +} +static int validateSPMV(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, sp<Allocation> Ap, + sp<Allocation> X, int incX, sp<Allocation> Y, int incY) { + if (!Ap->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Ap must have a Y dimension of 0 or 1"); + } + + int N = sqrt((double)Ap->getType()->getX() * 2); + if ((int)Ap->getType()->getX() != ((N * (N+1)) / 2)) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid dimension for Ap"); + } + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SPMV"); + } + int expectedYDim = 1 + (N - 1) * incY; + if ((int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SPMV"); + } + + return N; +} +static void validateGER(RS* mRS, sp<const Element> e, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e) ) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + int M = A->getType()->getY(); + int N = A->getType()->getX(); + + if (N < 1 || M < 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "M and N must be 1 or greater for GER"); + } + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (M - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for GER"); + } + int expectedYDim = 1 + (N - 1) * incY; + if ((int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for GER"); + } + + +} +static int validateSYR(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, + sp<Allocation> X, int incX, sp<Allocation> A) { + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + + int N = A->getType()->getX(); + + if (X->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + if (N != (int)A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "A must be a symmetric matrix"); + } + if (incX <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SYR"); + } + return N; +} +static int validateSPR(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, + sp<Allocation> X, int incX, sp<Allocation> Ap) { + if (!Ap->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Ap must have a Y dimension of 0 or 1"); + } + + int N = sqrt((double)Ap->getType()->getX() * 2); + if ((int)Ap->getType()->getX() != ((N * (N+1)) / 2)) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid dimension for Ap"); + } + if (incX <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SPR"); + } + + return N; +} + +static int validateSYR2(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, sp<Allocation> X, + int incX, sp<Allocation> Y, int incY, sp<Allocation> A) { + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + int N = A->getType()->getX(); + + if (N != (int)A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "A must be a symmetric matrix"); + } + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + int expectedYDim = 1 + (N - 1) * incY; + if ((int)X->getType()->getX() != expectedXDim || (int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SYR"); + } + return N; + +} +static int validateSPR2(RS* mRS, sp<const Element> e, RsBlasUplo Uplo, sp<Allocation> X, + int incX, sp<Allocation> Y, int incY, sp<Allocation> Ap) { + if (!Ap->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + if (Ap->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Ap must have a Y dimension of 0 or 1"); + } + + int N = sqrt((double)Ap->getType()->getX() * 2); + if ((int)Ap->getType()->getX() != ((N * (N+1)) / 2)) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid dimension for Ap"); + } + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (N - 1) * incX; + int expectedYDim = 1 + (N - 1) * incY; + if ((int)X->getType()->getX() != expectedXDim || (int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for SPR2"); + } + + return N; +} + +void ScriptIntrinsicBLAS::SSYMV(RsBlasUplo Uplo, float alpha, sp<Allocation> A, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY) { + int N = validateSYMV(mRS, Element::F32(mRS), Uplo, A, X, Y, incX, incY); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssymv, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + A->getID(), X->getID(), beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::SSBMV(RsBlasUplo Uplo, int K, float alpha, sp<Allocation> A, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY) { + // SBMV is the same as SYMV + K >= 0 + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be greater than or equal to 0"); + } + int N = validateSYMV(mRS, Element::F32(mRS), Uplo, A, X, Y, incX, incY); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssbmv, + 0, 0, 0, Uplo, 0, 0, N, K, alpha, + A->getID(), X->getID(), beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::SSPMV(RsBlasUplo Uplo, float alpha, sp<Allocation> Ap, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY) { + int N = validateSPMV(mRS, Element::F32(mRS), Uplo, Ap, X, incX, Y, incY); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sspmv, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + Ap->getID(), X->getID(), beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::SGER(float alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + int M = A->getType()->getY(); + int N = A->getType()->getX(); + validateGER(mRS, Element::F32(mRS), X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sger, + 0, 0, 0, 0, 0, M, N, 0, alpha, + X->getID(), Y->getID(), 0.f, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::SSYR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, + int incX, sp<Allocation> A) { + int N = validateSYR(mRS, Element::F32(mRS), Uplo, X, incX, A); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssyr, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), A->getID(), 0.f, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::SSPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, + int incX, sp<Allocation> Ap) { + int N = validateSPR(mRS, Element::F32(mRS), Uplo, X, incX, Ap); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sspr, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha, X->getID(), Ap->getID(), 0.f, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::SSYR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + int N = validateSYR2(mRS, Element::F32(mRS), Uplo, X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssyr2, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), Y->getID(), 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::SSPR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap) { + int N = validateSPR2(mRS, Element::F32(mRS), Uplo, X, incX, Y, incY, Ap); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sspr2, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), Y->getID(), 0, Ap->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DSYMV(RsBlasUplo Uplo, double alpha, sp<Allocation> A, sp<Allocation> X, + int incX, double beta, sp<Allocation> Y, int incY) { + int N = validateSYMV(mRS, Element::F64(mRS), Uplo, A, X, Y, incX, incY); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsymv, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + A->getID(), X->getID(), beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DSBMV(RsBlasUplo Uplo, int K, double alpha, sp<Allocation> A, sp<Allocation> X, + int incX, double beta, sp<Allocation> Y, int incY) { + // SBMV is the same as SYMV + K >= 0 + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be greater than or equal to 0"); + } + int N = validateSYMV(mRS, Element::F64(mRS), Uplo, A, X, Y, incX, incY); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsbmv, + 0, 0, 0, Uplo, 0, 0, N, K, alpha, + A->getID(), X->getID(), beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DSPMV(RsBlasUplo Uplo, double alpha, sp<Allocation> Ap, sp<Allocation> X, + int incX, double beta, sp<Allocation> Y, int incY) { + int N = validateSPMV(mRS, Element::F64(mRS), Uplo, Ap, X, incX, Y, incY); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dspmv, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + Ap->getID(), X->getID(), beta, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DGER(double alpha, sp<Allocation> X, int incX, sp<Allocation> Y, + int incY, sp<Allocation> A) { + int M = A->getType()->getY(); + int N = A->getType()->getX(); + validateGER(mRS, Element::F64(mRS), X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dger, + 0, 0, 0, 0, 0, M, N, 0, alpha, + X->getID(), Y->getID(), 0.f, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DSYR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, + int incX, sp<Allocation> A) { + int N = validateSYR(mRS, Element::F64(mRS), Uplo, X, incX, A); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsyr, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), A->getID(), 0.f, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DSPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, + int incX, sp<Allocation> Ap) { + int N = validateSPR(mRS, Element::F64(mRS), Uplo, X, incX, Ap); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dspr, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), Ap->getID(), 0.f, 0, incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DSYR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + int N = validateSYR2(mRS, Element::F64(mRS), Uplo, X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsyr2, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), Y->getID(), 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::DSPR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap) { + int N = validateSPR2(mRS, Element::F64(mRS), Uplo, X, incX, Y, incY, Ap); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dspr2, + 0, 0, 0, Uplo, 0, 0, N, 0, alpha, + X->getID(), Y->getID(), 0, Ap->getID(), incX, incY, 0, 0); +} + + +/** + * Level 2, C and Z only + */ + +static void validateGERU(RS* mRS, sp<const Element> e, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + if (!A->getType()->getElement()->isCompatible(e) || + !X->getType()->getElement()->isCompatible(e) || + !Y->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (X->getType()->getY() > 1 || Y->getType()->getY() > 1) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "BLAS vectors must have Y dimension of 0 or 1"); + } + + int M = A->getType()->getY(); + int N = A->getType()->getX(); + if (incX <= 0 || incY <= 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Vector increments must be greater than 0"); + } + int expectedXDim = 1 + (M - 1) * incX; + if ((int)X->getType()->getX() != expectedXDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for GERU"); + } + int expectedYDim = 1 + (N - 1) * incY; + if ((int)Y->getType()->getX() != expectedYDim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Incorrect vector dimensions for GERU"); + } + +} + +void ScriptIntrinsicBLAS::CHEMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, + sp<Allocation> X, int incX, Float2 beta, sp<Allocation> Y, int incY) { + // HEMV is the same as SYR2 validation-wise + int N = validateSYR2(mRS, Element::F32_2(mRS), Uplo, X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_chemv, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, sp<Allocation> A, + sp<Allocation> X, int incX, Float2 beta, sp<Allocation> Y, int incY) { + // HBMV is the same as SYR2 validation-wise + int N = validateSYR2(mRS, Element::F32_2(mRS), Uplo, X, incX, Y, incY, A); + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be 0 or greater for HBMV"); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_chbmv, + 0, 0, 0, Uplo, 0, 0, N, K, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CHPMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> Ap, + sp<Allocation> X, int incX, Float2 beta, sp<Allocation> Y, int incY) { + // HPMV is the same as SPR2 + int N = validateSPR2(mRS, Element::F32_2(mRS), Uplo, X, incX, Y, incY, Ap); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_chpmv, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, Ap->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CGERU(Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + validateGERU(mRS, Element::F32_2(mRS), X, incX, Y, incY, A); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cgeru, + 0, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CGERC(Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + // Same as GERU + validateGERU(mRS, Element::F32_2(mRS), X, incX, Y, incY, A); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cgerc, + 0, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CHER(RsBlasUplo Uplo, float alpha, sp<Allocation> X, + int incX, sp<Allocation> A) { + // Same as SYR + int N = validateSYR(mRS, Element::F32_2(mRS), Uplo, X, incX, A); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cher, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha, 0, X->getID(), 0, + 0, 0, A->getID(), incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CHPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, + int incX, sp<Allocation> Ap) { + // Equivalent to SPR for validation + int N = validateSPR(mRS, Element::F32_2(mRS), Uplo, X, incX, Ap); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_chpr, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha, 0, X->getID(), 0, + 0, 0, Ap->getID(), incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CHER2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + // Same as SYR2 + int N = validateSYR2(mRS, Element::F32_2(mRS), Uplo, X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cher2, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::CHPR2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap) { + // Same as SPR2 + int N = validateSPR2(mRS, Element::F32_2(mRS), Uplo, X, incX, Y, incY, Ap); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_chpr2, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, Ap->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHEMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, + sp<Allocation> X, int incX, Double2 beta, sp<Allocation> Y, int incY) { + // HEMV is the same as SYR2 validation-wise + int N = validateSYR2(mRS, Element::F64_2(mRS), Uplo, X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zhemv, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Double2 beta, sp<Allocation> Y, int incY) { + // HBMV is the same as SYR2 validation-wise + int N = validateSYR2(mRS, Element::F64_2(mRS), Uplo, X, incX, Y, incY, A); + if (K < 0) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "K must be 0 or greater for HBMV"); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zhbmv, + 0, 0, 0, Uplo, 0, 0, N, K, + alpha.x, alpha.y, A->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHPMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> Ap, sp<Allocation> X, + int incX, Double2 beta, sp<Allocation> Y, int incY) { + // HPMV is the same as SPR2 + int N = validateSPR2(mRS, Element::F64_2(mRS), Uplo, X, incX, Y, incY, Ap); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zhpmv, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, Ap->getID(), X->getID(), + beta.x, beta.y, Y->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZGERU(Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + validateGERU(mRS, Element::F64_2(mRS), X, incX, Y, incY, A); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zgeru, + 0, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZGERC(Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + // Same as GERU + validateGERU(mRS, Element::F64_2(mRS), X, incX, Y, incY, A); + int M = A->getType()->getY(); + int N = A->getType()->getX(); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zgerc, + 0, 0, 0, 0, 0, M, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHER(RsBlasUplo Uplo, double alpha, sp<Allocation> X, + int incX, sp<Allocation> A) { + // Same as SYR + int N = validateSYR(mRS, Element::F64_2(mRS), Uplo, X, incX, A); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zher, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha, 0, X->getID(), 0, + 0, 0, A->getID(), incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, + int incX, sp<Allocation> Ap) { + // Equivalent to SPR for validation + int N = validateSPR(mRS, Element::F64_2(mRS), Uplo, X, incX, Ap); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zhpr, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha, 0, X->getID(), 0, + 0, 0, Ap->getID(), incX, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHER2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A) { + // Same as SYR2 + int N = validateSYR2(mRS, Element::F64_2(mRS), Uplo, X, incX, Y, incY, A); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zher2, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, A->getID(), incX, incY, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHPR2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap) { + // Same as SPR2 + int N = validateSPR2(mRS, Element::F64_2(mRS), Uplo, X, incX, Y, incY, Ap); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zhpr2, + 0, 0, 0, Uplo, 0, 0, N, 0, + alpha.x, alpha.y, X->getID(), Y->getID(), + 0, 0, Ap->getID(), incX, incY, 0, 0); +} + + +/** + * Level 3 BLAS + */ + +static void validateL3(RS* mRS, sp<const Element> e, int TransA, int TransB, int Side, + sp<Allocation> A, sp<Allocation> B, sp<Allocation> C) { + int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; + if ((A != nullptr && !A->getType()->getElement()->isCompatible(e)) || + (B != nullptr && !B->getType()->getElement()->isCompatible(e)) || + (C != nullptr && !C->getType()->getElement()->isCompatible(e))) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (C == nullptr) { + // Since matrix C is used to store the result, it cannot be null. + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Allocation C cannot be null"); + } + cM = C->getType()->getY(); + cN = C->getType()->getX(); + + if (Side == RsBlasRight) { + if ((A == nullptr && B != nullptr) || (A != nullptr && B == nullptr)) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Provided Matrix A without Matrix B, or vice versa"); + } + if (B != nullptr) { + bM = A->getType()->getY(); + bN = A->getType()->getX(); + } + if (A != nullptr) { + aM = B->getType()->getY(); + aN = B->getType()->getX(); + } + } else { + if (A != nullptr) { + if (TransA == RsBlasTrans || TransA == RsBlasConjTrans) { + aN = A->getType()->getY(); + aM = A->getType()->getX(); + } else { + aM = A->getType()->getY(); + aN = A->getType()->getX(); + } + } + if (B != nullptr) { + if (TransB == RsBlasTrans || TransB == RsBlasConjTrans) { + bN = B->getType()->getY(); + bM = B->getType()->getX(); + } else { + bM = B->getType()->getY(); + bN = B->getType()->getX(); + } + } + } + if (A != nullptr && B != nullptr && C != nullptr) { + if (aN != bM || aM != cM || bN != cN) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called BLAS with invalid dimensions"); + } + } else if (A != nullptr && C != nullptr) { + // A and C only, for SYRK + if (cM != cN) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Matrix C is not symmetric"); + } + if (aM != cM) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called BLAS with invalid dimensions"); + } + } else if (A != nullptr && B != nullptr) { + // A and B only + if (aN != bM) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called BLAS with invalid dimensions"); + } + } + +} + +void ScriptIntrinsicBLAS::SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, + sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C) { + validateL3(mRS, Element::F32(mRS), TransA, TransB, 0, A, B, C); + + int M = -1, N = -1, K = -1; + if (TransA != RsBlasNoTrans) { + M = A->getType()->getX(); + K = A->getType()->getY(); + } else { + M = A->getType()->getY(); + K = A->getType()->getX(); + } + if (TransB != RsBlasNoTrans) { + N = B->getType()->getY(); + } else { + N = B->getType()->getX(); + } + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_sgemm, + TransA, TransB, 0, 0, 0, M, N, K, + alpha, A->getID(), B->getID(), + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, + sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C) { + validateL3(mRS, Element::F64(mRS), TransA, TransB, 0, A, B, C); + int M = -1, N = -1, K = -1; + if (TransA != RsBlasNoTrans) { + M = A->getType()->getX(); + K = A->getType()->getY(); + } else { + M = A->getType()->getY(); + K = A->getType()->getX(); + } + if (TransB != RsBlasNoTrans) { + N = B->getType()->getY(); + } else { + N = B->getType()->getX(); + } + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dgemm, + TransA, TransB, 0, 0, 0, M, N, K, + alpha, A->getID(), B->getID(), + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, + sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C) { + validateL3(mRS, Element::F32_2(mRS), TransA, TransB, 0, A, B, C); + int M = -1, N = -1, K = -1; + if (TransA != RsBlasNoTrans) { + M = A->getType()->getX(); + K = A->getType()->getY(); + } else { + M = A->getType()->getY(); + K = A->getType()->getX(); + } + if (TransB != RsBlasNoTrans) { + N = B->getType()->getY(); + } else { + N = B->getType()->getX(); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cgemm, + TransA, TransB, 0, 0, 0, M, N, K, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, + sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C) { + validateL3(mRS, Element::F64_2(mRS), TransA, TransB, 0, A, B, C); + int M = -1, N = -1, K = -1; + if (TransA != RsBlasNoTrans) { + M = A->getType()->getX(); + K = A->getType()->getY(); + } else { + M = A->getType()->getY(); + K = A->getType()->getX(); + } + if (TransB != RsBlasNoTrans) { + N = B->getType()->getY(); + } else { + N = B->getType()->getX(); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zgemm, + TransA, TransB, 0, 0, 0, M, N, K, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, + sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C) { + //For SYMM, Matrix A should be symmetric + if (A->getType()->getX() != A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Matrix A is not symmetric"); + } + validateL3(mRS, Element::F32(mRS), 0, 0, Side, A, B, C); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssymm, + 0, 0, Side, Uplo, 0, C->getType()->getY(), C->getType()->getX(), 0, + alpha, A->getID(), B->getID(), + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, + sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C) { + if (A->getType()->getX() != A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Matrix A is not symmetric"); + } + validateL3(mRS, Element::F64(mRS), 0, 0, Side, A, B, C); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsymm, + 0, 0, Side, Uplo, 0, C->getType()->getY(), C->getType()->getX(), 0, + alpha, A->getID(), B->getID(), + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, + sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C) { + if (A->getType()->getX() != A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Matrix A is not symmetric"); + } + validateL3(mRS, Element::F32_2(mRS), 0, 0, Side, A, B, C); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_csymm, + 0, 0, Side, Uplo, 0, C->getType()->getY(), C->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, + sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C) { + if (A->getType()->getX() != A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Matrix A is not symmetric"); + } + validateL3(mRS, Element::F64_2(mRS), 0, 0, Side, A, B, C); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zsymm, + 0, 0, Side, Uplo, 0, C->getType()->getY(), C->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, + sp<Allocation> A, float beta, sp<Allocation> C) { + validateL3(mRS, Element::F32(mRS), Trans, 0, 0, A, nullptr, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssyrk, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha, A->getID(), 0, + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, + sp<Allocation> A, double beta, sp<Allocation> C) { + validateL3(mRS, Element::F64(mRS), Trans, 0, 0, A, nullptr, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsyrk, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha, A->getID(), 0, + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, + sp<Allocation> A, Float2 beta, sp<Allocation> C) { + validateL3(mRS, Element::F32_2(mRS), Trans, 0, 0, A, nullptr, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_csyrk, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha.x, alpha.y, A->getID(), 0, + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, + sp<Allocation> A, Double2 beta, sp<Allocation> C) { + validateL3(mRS, Element::F64_2(mRS), Trans, 0, 0, A, nullptr, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zsyrk, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha.x, alpha.y, A->getID(), 0, + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +static void validateSYR2K(RS* mRS, sp<const Element> e, RsBlasTranspose Trans, + sp<Allocation> A, sp<Allocation> B, sp<Allocation> C) { + if (!A->getType()->getElement()->isCompatible(e) || + !B->getType()->getElement()->isCompatible(e) || + !C->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + int Cdim = -1; + // A is n x k if no transpose, k x n if transpose + // C is n x n + if (Trans == RsBlasTrans) { + // check columns versus C + Cdim = A->getType()->getX(); + } else { + // check rows versus C + Cdim = A->getType()->getY(); + } + if ((int)C->getType()->getX() != Cdim || (int)C->getType()->getY() != Cdim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid symmetric matrix in SYR2K"); + } + // A dims == B dims + if (A->getType()->getX() != B->getType()->getX() || A->getType()->getY() != B->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid A and B in SYR2K"); + } +} + +void ScriptIntrinsicBLAS::SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, + sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C) { + validateSYR2K(mRS, Element::F32(mRS), Trans, A, B, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_ssyr2k, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha, A->getID(), B->getID(), + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, + sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C) { + validateSYR2K(mRS, Element::F64(mRS), Trans, A, B, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dsyr2k, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha, A->getID(), B->getID(), + beta, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, + sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C) { + validateSYR2K(mRS, Element::F32_2(mRS), Trans, A, B, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_csyr2k, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, + sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C) { + validateSYR2K(mRS, Element::F64_2(mRS), Trans, A, B, C); + int K = -1; + if (Trans != RsBlasNoTrans) { + K = A->getType()->getY(); + } else { + K = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zsyr2k, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), K, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +static void validateTRMM(RS* mRS, sp<const Element> e, RsBlasSide Side, RsBlasTranspose TransA, + sp<Allocation> A, sp<Allocation> B) { + int aM = -1, aN = -1, bM = -1, bN = -1; + if (!A->getType()->getElement()->isCompatible(e) || + !B->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + + aM = A->getType()->getY(); + aN = A->getType()->getX(); + if (aM != aN) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called TRMM with a non-symmetric matrix A"); + } + + bM = B->getType()->getY(); + bN = B->getType()->getX(); + if (Side == RsBlasLeft) { + if (aN != bM) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called TRMM with invalid matrices"); + } + } else { + if (bN != aM) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called TRMM with invalid matrices"); + } + } +} + +void ScriptIntrinsicBLAS::STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + float alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRMM(mRS, Element::F32(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_strmm, + TransA, 0, Side, Uplo, Diag,\ + B->getType()->getY(), B->getType()->getX(), 0, + alpha, A->getID(), B->getID(), 0.f, 0, 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + double alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRMM(mRS, Element::F64(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtrmm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Float2 alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRMM(mRS, Element::F32_2(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctrmm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Double2 alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRMM(mRS, Element::F64_2(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztrmm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0, 0); +} + +static void validateTRSM(RS* mRS, sp<const Element> e, RsBlasSide Side, RsBlasTranspose TransA, + sp<Allocation> A, sp<Allocation> B) { + int adim = -1, bM = -1, bN = -1; + if (!A->getType()->getElement()->isCompatible(e) || + !B->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + adim = A->getType()->getX(); + if (adim != (int)A->getType()->getY()) { + // This may be unnecessary, the restriction could potentially be relaxed. + // Allocation A needs to contain at least that symmetric matrix but could theoretically + // be larger for now we assume adapters are sufficient, will reevaluate in the future. + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called TRSM with a non-symmetric matrix A"); + } + bM = B->getType()->getY(); + bN = B->getType()->getX(); + if (Side == RsBlasLeft) { + // A is M*M + if (adim != bM) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called TRSM with invalid matrix dimensions"); + } + } else { + // A is N*N + if (adim != bN) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called TRSM with invalid matrix dimensions"); + } + } +} + +void ScriptIntrinsicBLAS::STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + float alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRSM(mRS, Element::F32(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Single(mRS, mRS->getContext(), getID(), RsBlas_strsm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + double alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRSM(mRS, Element::F64(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Double(mRS, mRS->getContext(), getID(), RsBlas_dtrsm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Float2 alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRSM(mRS, Element::F32_2(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_ctrsm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Double2 alpha, sp<Allocation> A, sp<Allocation> B) { + validateTRSM(mRS, Element::F64_2(mRS), Side, TransA, A, B); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_ztrsm, + TransA, 0, Side, Uplo, Diag, + B->getType()->getY(), B->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), 0, 0, 0, 0, 0, 0, 0); +} + +static void validateHEMM(RS* mRS, sp<const Element> e, RsBlasSide Side, + sp<Allocation> A, sp<Allocation> B, sp<Allocation> C) { + if (!A->getType()->getElement()->isCompatible(e) || + !B->getType()->getElement()->isCompatible(e) || + !C->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + + // A must be square; can potentially be relaxed similar to TRSM + int adim = A->getType()->getX(); + if (adim != (int)A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HEMM with non-square A"); + } + if ((Side == RsBlasLeft && adim != (int)B->getType()->getY()) || + (Side == RsBlasRight && adim != (int)B->getType()->getX())) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HEMM with invalid B"); + } + if (B->getType()->getX() != C->getType()->getX() || + B->getType()->getY() != C->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HEMM with mismatched B and C"); + } +} + +void ScriptIntrinsicBLAS::CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, + sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C) { + validateHEMM(mRS, Element::F32_2(mRS), Side, A, B, C); + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_chemm, + 0, 0, Side, Uplo, 0, + C->getType()->getY(), C->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, + sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C) { + validateHEMM(mRS, Element::F64_2(mRS), Side, A, B, C); + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zhemm, + 0, 0, Side, Uplo, 0, + C->getType()->getY(), C->getType()->getX(), 0, + alpha.x, alpha.y, A->getID(), B->getID(), + beta.x, beta.y, C->getID(), 0, 0, 0, 0); +} + +static void validateHERK(RS* mRS, sp<const Element> e, RsBlasTranspose Trans, + sp<Allocation> A, sp<Allocation> C) { + if (!A->getType()->getElement()->isCompatible(e) || + !C->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (Trans != RsBlasNoTrans && Trans != RsBlasConjTrans) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Call HERK with invalid Transpose"); + } + int cdim = C->getType()->getX(); + if (cdim != (int)C->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HERK with non-square C"); + } + if (Trans == RsBlasNoTrans) { + if (cdim != (int)A->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HERK with invalid A"); + } + } else { + if (cdim != (int)A->getType()->getX()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HERK with invalid A"); + } + } +} + +void ScriptIntrinsicBLAS::CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, + sp<Allocation> A, float beta, sp<Allocation> C) { + validateHERK(mRS, Element::F32_2(mRS), Trans, A, C); + int k = 0; + if (Trans == RsBlasConjTrans) { + k = A->getType()->getY(); + } else { + k = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cherk, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), k, + alpha, 0, A->getID(), 0, + beta, 0, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, + sp<Allocation> A, double beta, sp<Allocation> C) { + validateHERK(mRS, Element::F64_2(mRS), Trans, A, C); + int k = 0; + if (Trans == RsBlasConjTrans) { + k = A->getType()->getY(); + } else { + k = A->getType()->getX(); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zherk, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), k, + alpha, 0, A->getID(), 0, + beta, 0, C->getID(), 0, 0, 0, 0); +} + +static void validateHER2K(RS* mRS, sp<const Element> e, RsBlasTranspose Trans, + sp<Allocation> A, sp<Allocation> B, sp<Allocation> C) { + if (!A->getType()->getElement()->isCompatible(e) || + !B->getType()->getElement()->isCompatible(e) || + !C->getType()->getElement()->isCompatible(e)) { + mRS->throwError(RS_ERROR_INVALID_ELEMENT, "Called BLAS with wrong Element type"); + } + if (Trans != RsBlasNoTrans && Trans != RsBlasConjTrans) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Call HERK with invalid Transpose"); + } + int cdim = C->getType()->getX(); + if (cdim != (int)C->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HER2K with non-square C"); + } + if (Trans == RsBlasNoTrans) { + if ((int)A->getType()->getY() != cdim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HER2K with invalid matrices"); + } + } else { + if ((int)A->getType()->getX() != cdim) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HER2K with invalid matrices"); + } + } + if (A->getType()->getX() != B->getType()->getX() || A->getType()->getY() != B->getType()->getY()) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Called HER2K with invalid A and B matrices"); + } +} + +void ScriptIntrinsicBLAS::CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, + sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C) { + validateHER2K(mRS, Element::F32_2(mRS), Trans, A, B, C); + int k = 0; + if (Trans == RsBlasNoTrans) { + k = A->getType()->getX(); + } else { + k = A->getType()->getY(); + } + nScriptIntrinsicBLAS_Complex(mRS, mRS->getContext(), getID(), RsBlas_cher2k, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), k, + alpha.x, alpha.y, A->getID(), B->getID(), + beta, 0, C->getID(), 0, 0, 0, 0); +} + +void ScriptIntrinsicBLAS::ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, + sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C) { + validateHER2K(mRS, Element::F64_2(mRS), Trans, A, B, C); + int k = 0; + if (Trans == RsBlasNoTrans) { + k = A->getType()->getX(); + } else { + k = A->getType()->getY(); + } + nScriptIntrinsicBLAS_Z(mRS, mRS->getContext(), getID(), RsBlas_zher2k, + Trans, 0, 0, Uplo, 0, 0, C->getType()->getX(), k, + alpha.x, alpha.y, A->getID(), B->getID(), + beta, 0, C->getID(), 0, 0, 0, 0); +} + + + +void ScriptIntrinsicBLAS::BNNM(sp<Allocation> A, int a_offset, sp<Allocation> B, int b_offset, + sp<Allocation> C, int c_offset, int c_mult) { + validateL3(mRS, Element::U8(mRS), RsBlasNoTrans, RsBlasTrans, 0, A, B, C); + + if (a_offset < 0 || a_offset > 255) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid a_offset passed to BNNM"); + } + if (b_offset < 0 || b_offset > 255) { + mRS->throwError(RS_ERROR_INVALID_PARAMETER, "Invalid b_offset passed to BNNM"); + } + int M = -1, N = -1, K = -1; + M = A->getType()->getY(); + N = B->getType()->getY(); + K = A->getType()->getX(); + + nScriptIntrinsicBLAS_BNNM(mRS, mRS->getContext(), getID(), M, N, K, A->getID(), a_offset, + B->getID(), b_offset, C->getID(), c_offset, c_mult); +} diff --git a/cpp/ScriptIntrinsics.cpp b/cpp/ScriptIntrinsics.cpp index 0a2df33a..38224e69 100644 --- a/cpp/ScriptIntrinsics.cpp +++ b/cpp/ScriptIntrinsics.cpp @@ -633,4 +633,4 @@ void ScriptIntrinsicYuvToRGB::forEach(sp<Allocation> out) { } Script::forEach(0, nullptr, out, nullptr, 0); -} +}
\ No newline at end of file diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h index 34988979..3f83ba15 100644 --- a/cpp/rsCppStructs.h +++ b/cpp/rsCppStructs.h @@ -86,6 +86,277 @@ class Sampler; RS_INIT_MAX = 32 }; + +class Byte2 { + public: + int8_t x, y; + + Byte2(int8_t initX, int8_t initY) + : x(initX), y(initY) {} + Byte2() : x(0), y(0) {} +}; + +class Byte3 { + public: + int8_t x, y, z; + + Byte3(int8_t initX, int8_t initY, int8_t initZ) + : x(initX), y(initY), z(initZ) {} + Byte3() : x(0), y(0), z(0) {} +}; + +class Byte4 { + public: + int8_t x, y, z, w; + + Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW) + : x(initX), y(initY), z(initZ), w(initW) {} + Byte4() : x(0), y(0), z(0), w(0) {} +}; + +class UByte2 { + public: + uint8_t x, y; + + UByte2(uint8_t initX, uint8_t initY) + : x(initX), y(initY) {} + UByte2() : x(0), y(0) {} +}; + +class UByte3 { + public: + uint8_t x, y, z; + + UByte3(uint8_t initX, uint8_t initY, uint8_t initZ) + : x(initX), y(initY), z(initZ) {} + UByte3() : x(0), y(0), z(0) {} +}; + +class UByte4 { + public: + uint8_t x, y, z, w; + + UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW) + : x(initX), y(initY), z(initZ), w(initW) {} + UByte4() : x(0), y(0), z(0), w(0) {} +}; + +class Short2 { + public: + short x, y; + + Short2(short initX, short initY) + : x(initX), y(initY) {} + Short2() : x(0), y(0) {} +}; + +class Short3 { + public: + short x, y, z; + + Short3(short initX, short initY, short initZ) + : x(initX), y(initY), z(initZ) {} + Short3() : x(0), y(0), z(0) {} +}; + +class Short4 { + public: + short x, y, z, w; + + Short4(short initX, short initY, short initZ, short initW) + : x(initX), y(initY), z(initZ), w(initW) {} + Short4() : x(0), y(0), z(0), w(0) {} +}; + +class UShort2 { + public: + uint16_t x, y; + + UShort2(uint16_t initX, uint16_t initY) + : x(initX), y(initY) {} + UShort2() : x(0), y(0) {} +}; + +class UShort3 { + public: + uint16_t x, y, z; + + UShort3(uint16_t initX, uint16_t initY, uint16_t initZ) + : x(initX), y(initY), z(initZ) {} + UShort3() : x(0), y(0), z(0) {} +}; + +class UShort4 { + public: + uint16_t x, y, z, w; + + UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW) + : x(initX), y(initY), z(initZ), w(initW) {} + UShort4() : x(0), y(0), z(0), w(0) {} +}; + +class Int2 { + public: + int x, y; + + Int2(int initX, int initY) + : x(initX), y(initY) {} + Int2() : x(0), y(0) {} +}; + +class Int3 { + public: + int x, y, z; + + Int3(int initX, int initY, int initZ) + : x(initX), y(initY), z(initZ) {} + Int3() : x(0), y(0), z(0) {} +}; + +class Int4 { + public: + int x, y, z, w; + + Int4(int initX, int initY, int initZ, int initW) + : x(initX), y(initY), z(initZ), w(initW) {} + Int4() : x(0), y(0), z(0), w(0) {} +}; + +class UInt2 { + public: + uint32_t x, y; + + UInt2(uint32_t initX, uint32_t initY) + : x(initX), y(initY) {} + UInt2() : x(0), y(0) {} +}; + +class UInt3 { + public: + uint32_t x, y, z; + + UInt3(uint32_t initX, uint32_t initY, uint32_t initZ) + : x(initX), y(initY), z(initZ) {} + UInt3() : x(0), y(0), z(0) {} +}; + +class UInt4 { + public: + uint32_t x, y, z, w; + + UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW) + : x(initX), y(initY), z(initZ), w(initW) {} + UInt4() : x(0), y(0), z(0), w(0) {} +}; + +class Long2 { + public: + int64_t x, y; + + Long2(int64_t initX, int64_t initY) + : x(initX), y(initY) {} + Long2() : x(0), y(0) {} +}; + +class Long3 { + public: + int64_t x, y, z; + + Long3(int64_t initX, int64_t initY, int64_t initZ) + : x(initX), y(initY), z(initZ) {} + Long3() : x(0), y(0), z(0) {} +}; + +class Long4 { + public: + int64_t x, y, z, w; + + Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW) + : x(initX), y(initY), z(initZ), w(initW) {} + Long4() : x(0), y(0), z(0), w(0) {} +}; + +class ULong2 { + public: + uint64_t x, y; + + ULong2(uint64_t initX, uint64_t initY) + : x(initX), y(initY) {} + ULong2() : x(0), y(0) {} +}; + +class ULong3 { + public: + uint64_t x, y, z; + + ULong3(uint64_t initX, uint64_t initY, uint64_t initZ) + : x(initX), y(initY), z(initZ) {} + ULong3() : x(0), y(0), z(0) {} +}; + +class ULong4 { + public: + uint64_t x, y, z, w; + + ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW) + : x(initX), y(initY), z(initZ), w(initW) {} + ULong4() : x(0), y(0), z(0), w(0) {} +}; + +class Float2 { + public: + float x, y; + + Float2(float initX, float initY) + : x(initX), y(initY) {} + Float2() : x(0), y(0) {} +}; + +class Float3 { + public: + float x, y, z; + + Float3(float initX, float initY, float initZ) + : x(initX), y(initY), z(initZ) {} + Float3() : x(0.f), y(0.f), z(0.f) {} +}; + +class Float4 { + public: + float x, y, z, w; + + Float4(float initX, float initY, float initZ, float initW) + : x(initX), y(initY), z(initZ), w(initW) {} + Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {} +}; + +class Double2 { + public: + double x, y; + + Double2(double initX, double initY) + : x(initX), y(initY) {} + Double2() : x(0), y(0) {} +}; + +class Double3 { + public: + double x, y, z; + + Double3(double initX, double initY, double initZ) + : x(initX), y(initY), z(initZ) {} + Double3() : x(0), y(0), z(0) {} +}; + +class Double4 { + public: + double x, y, z, w; + + Double4(double initX, double initY, double initZ, double initW) + : x(initX), y(initY), z(initZ), w(initW) {} + Double4() : x(0), y(0), z(0), w(0) {} +}; + /** * The RenderScript context. This class controls initialization, resource management, and teardown. */ @@ -1514,6 +1785,1946 @@ class ScriptIntrinsic3DLUT : public ScriptIntrinsic { void setLUT(sp<Allocation> lut); }; + +/** + * Intrinsic kernel provides high performance RenderScript APIs to BLAS. + * + * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard + * building blocks for performing basic vector and matrix operations. + * + * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ + * + **/ +class ScriptIntrinsicBLAS : public ScriptIntrinsic { + private: + ScriptIntrinsicBLAS(sp<RS> rs, sp<const Element> e); + public: + /** + * Create an intrinsic to access BLAS subroutines. + * + * @param rs The RenderScript context + * @return ScriptIntrinsicBLAS + */ + static sp<ScriptIntrinsicBLAS> create(sp<RS> rs); + + /** + * SGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void SGEMV(RsBlasTranspose TransA, + float alpha, sp<Allocation> A, sp<Allocation> X, int incX, + float beta, sp<Allocation> Y, int incY); + + /** + * DGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void DGEMV(RsBlasTranspose TransA, + double alpha, sp<Allocation> A, sp<Allocation> X, int incX, + double beta, sp<Allocation> Y, int incY); + + /** + * CGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void CGEMV(RsBlasTranspose TransA, + Float2 alpha, sp<Allocation> A, sp<Allocation> X, int incX, + Float2 beta, sp<Allocation> Y, int incY); + + /** + * ZGEMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void ZGEMV(RsBlasTranspose TransA, + Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX, + Double2 beta, sp<Allocation> Y, int incY); + + /** + * SGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void SGBMV(RsBlasTranspose TransA, + int KL, int KU, float alpha, sp<Allocation> A, sp<Allocation> X, int incX, + float beta, sp<Allocation> Y, int incY); + + /** + * DGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void DGBMV(RsBlasTranspose TransA, + int KL, int KU, double alpha, sp<Allocation> A, sp<Allocation> X, + int incX, double beta, sp<Allocation> Y, int incY); + + /** + * CGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void CGBMV(RsBlasTranspose TransA, + int KL, int KU, Float2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Float2 beta, sp<Allocation> Y, int incY); + + /** + * ZGBMV performs one of the matrix-vector operations + * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html + * + * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), + * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an + * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. + * for i in range(0, m): + * for j in range(max(0, i-kl), min(i+ku+1, n)): + * b[i, j-i+kl] = a[i, j] + * + * @param TransA The type of transpose applied to matrix A. + * @param KL The number of sub-diagonals of the matrix A. + * @param KU The number of super-diagonals of the matrix A. + * @param alpha The scalar alpha. + * @param A The input allocation contains the band matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void ZGBMV(RsBlasTranspose TransA, + int KL, int KU, Double2 alpha, sp<Allocation> A, sp<Allocation> X, int incX, + Double2 beta, sp<Allocation> Y, int incY); + + /** + * STRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void STRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * DTRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void DTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * CTRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void CTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * ZTRMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void ZTRMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * STBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void STBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * DTBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void DTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * CTBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void CTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * ZTBMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void ZTBMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * STPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void STPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * DTPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x + * + * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void DTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * CTPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void CTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * ZTPMV performs one of the matrix-vector operations + * x := A*x or x := A**T*x or x := A**H*x + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void ZTPMV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * STRSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void STRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * DTRSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void DTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * CTRSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void CTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * ZTRSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void ZTRSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * STBSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void STBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * DTBSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void DTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * CTBSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void CTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * ZTBSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param K The number of off-diagonals of the matrix A + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void ZTBSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + int K, sp<Allocation> A, sp<Allocation> X, int incX); + + /** + * STPSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void STPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * DTPSV solves one of the systems of equations + * A*x = b or A**T*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void DTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * CTPSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void CTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * ZTPSV solves one of the systems of equations + * A*x = b or A**T*x = b or A**H*x = b + * + * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param Ap The input allocation contains packed matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + */ + void ZTPSV(RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + sp<Allocation> Ap, sp<Allocation> X, int incX); + + /** + * SSYMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void SSYMV(RsBlasUplo Uplo, float alpha, sp<Allocation> A, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY); + + /** + * SSBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void SSBMV(RsBlasUplo Uplo, int K, float alpha, sp<Allocation> A, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY); + + /** + * SSPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void SSPMV(RsBlasUplo Uplo, float alpha, sp<Allocation> Ap, sp<Allocation> X, + int incX, float beta, sp<Allocation> Y, int incY); + + /** + * SGER performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + */ + void SGER(float alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * SSYR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + */ + void SSYR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A); + + /** + * SSPR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}. + */ + void SSPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap); + + /** + * SSYR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + */ + void SSYR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * SSPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32}. + */ + void SSPR2(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap); + + /** + * DSYMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void DSYMV(RsBlasUplo Uplo, double alpha, sp<Allocation> A, sp<Allocation> X, int incX, + double beta, sp<Allocation> Y, int incY); + + /** + * DSBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void DSBMV(RsBlasUplo Uplo, int K, double alpha, sp<Allocation> A, sp<Allocation> X, int incX, + double beta, sp<Allocation> Y, int incY); + + /** + * DSPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void DSPMV(RsBlasUplo Uplo, double alpha, sp<Allocation> Ap, sp<Allocation> X, int incX, + double beta, sp<Allocation> Y, int incY); + + /** + * DGER performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + */ + void DGER(double alpha, sp<Allocation> X, int incX, sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * DSYR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + */ + void DSYR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A); + + /** + * DSPR performs the rank 1 operation + * A := alpha*x*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}. + */ + void DSPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap); + + /** + * DSYR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + */ + void DSYR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * DSPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**T + alpha*y*x**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64}. + */ + void DSPR2(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap); + + /** + * CHEMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void CHEMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Float2 beta, sp<Allocation> Y, int incY); + + /** + * CHBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void CHBMV(RsBlasUplo Uplo, int K, Float2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Float2 beta, sp<Allocation> Y, int incY); + + /** + * CHPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void CHPMV(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> Ap, sp<Allocation> X, + int incX, Float2 beta, sp<Allocation> Y, int incY); + + /** + * CGERU performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + */ + void CGERU(Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * CGERC performs the rank 1 operation + * A := alpha*x*y**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + */ + void CGERC(Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * CHER performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + */ + void CHER(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> A); + + /** + * CHPR performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}. + */ + void CHPR(RsBlasUplo Uplo, float alpha, sp<Allocation> X, int incX, sp<Allocation> Ap); + + /** + * CHER2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + */ + void CHER2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * CHPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F32_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F32_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F32_2}. + */ + void CHPR2(RsBlasUplo Uplo, Float2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap); + + /** + * ZHEMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void ZHEMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Double2 beta, sp<Allocation> Y, int incY); + + /** + * ZHBMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), + * but only the region N*(K+1) will be referenced. The following subroutine can is an + * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. + * for i in range(0, n): + * for j in range(i, min(i+k+1, n)): + * b[i, j-i] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. + * @param K The number of off-diagonals of the matrix A + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void ZHBMV(RsBlasUplo Uplo, int K, Double2 alpha, sp<Allocation> A, sp<Allocation> X, + int incX, Double2 beta, sp<Allocation> Y, int incY); + + /** + * ZHPMV performs the matrix-vector operation + * y := alpha*A*x + beta*y + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. + * @param alpha The scalar alpha. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param beta The scalar beta. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + */ + void ZHPMV(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> Ap, sp<Allocation> X, + int incX, Double2 beta, sp<Allocation> Y, int incY); + + /** + * ZGERU performs the rank 1 operation + * A := alpha*x*y**T + A + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + */ + void ZGERU(Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * ZGERC performs the rank 1 operation + * A := alpha*x*y**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html + * + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + */ + void ZGERC(Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * ZHER performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + */ + void ZHER(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> A); + + /** + * ZHPR performs the rank 1 operation + * A := alpha*x*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}. + */ + void ZHPR(RsBlasUplo Uplo, double alpha, sp<Allocation> X, int incX, sp<Allocation> Ap); + + /** + * ZHER2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + */ + void ZHER2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> A); + + /** + * ZHPR2 performs the symmetric rank 2 operation + * A := alpha*x*y**H + alpha*y*x**H + A + * + * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html + * + * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, + * The following subroutine can is an example showing how to convert a UPPER trianglar matrix + * 'a' to packed matrix 'b'. + * k = 0 + * for i in range(0, n): + * for j in range(i, n): + * b[k++] = a[i, j] + * + * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. + * @param alpha The scalar alpha. + * @param X The input allocation contains vector x, supported elements type: {Element#F64_2}. + * @param incX The increment for the elements of vector x, must be larger than zero. + * @param Y The input allocation contains vector y, supported elements type: {Element#F64_2}. + * @param incY The increment for the elements of vector y, must be larger than zero. + * @param Ap The input allocation contains matrix A, supported elements type: {Element#F64_2}. + */ + void ZHPR2(RsBlasUplo Uplo, Double2 alpha, sp<Allocation> X, int incX, + sp<Allocation> Y, int incY, sp<Allocation> Ap); + + /** + * SGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T + * + * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32}. + */ + void SGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, float alpha, sp<Allocation> A, + sp<Allocation> B, float beta, sp<Allocation> C); + + + /** + * DGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64}. + */ + void DGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, double alpha, sp<Allocation> A, + sp<Allocation> B, double beta, sp<Allocation> C); + + /** + * CGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Float2 alpha, sp<Allocation> A, + sp<Allocation> B, Float2 beta, sp<Allocation> C); + + /** + * ZGEMM performs one of the matrix-matrix operations + * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html + * + * @param TransA The type of transpose applied to matrix A. + * @param TransB The type of transpose applied to matrix B. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2 + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2 + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2 + */ + void ZGEMM(RsBlasTranspose TransA, RsBlasTranspose TransB, Double2 alpha, sp<Allocation> A, + sp<Allocation> B, Double2 beta, sp<Allocation> C); + + /** + * SSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32}. + */ + void SSYMM(RsBlasSide Side, RsBlasUplo Uplo, float alpha, sp<Allocation> A, + sp<Allocation> B, float beta, sp<Allocation> C); + + /** + * DSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64}. + */ + void DSYMM(RsBlasSide Side, RsBlasUplo Uplo, double alpha, sp<Allocation> A, + sp<Allocation> B, double beta, sp<Allocation> C); + + /** + * CSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CSYMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, + sp<Allocation> B, Float2 beta, sp<Allocation> C); + + /** + * ZSYMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}. + */ + void ZSYMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, + sp<Allocation> B, Double2 beta, sp<Allocation> C); + + /** + * SSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32}. + */ + void SSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, + sp<Allocation> A, float beta, sp<Allocation> C); + + /** + * DSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64}. + */ + void DSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, + sp<Allocation> A, double beta, sp<Allocation> C); + + /** + * CSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, + sp<Allocation> A, Float2 beta, sp<Allocation> C); + + /** + * ZSYRK performs one of the symmetric rank k operations + * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}. + */ + void ZSYRK(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, + sp<Allocation> A, Double2 beta, sp<Allocation> C); + + /** + * SSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32}. + */ + void SSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, + sp<Allocation> A, sp<Allocation> B, float beta, sp<Allocation> C); + + /** + * DSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64}. + */ + void DSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, + sp<Allocation> A, sp<Allocation> B, double beta, sp<Allocation> C); + + /** + * CSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, + sp<Allocation> A, sp<Allocation> B, Float2 beta, sp<Allocation> C); + + /** + * ZSYR2K performs one of the symmetric rank 2k operations + * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}. + */ + void ZSYR2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, + sp<Allocation> A, sp<Allocation> B, Double2 beta, sp<Allocation> C); + + /** + * STRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32}. + */ + void STRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, + RsBlasDiag Diag, float alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * DTRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64}. + */ + void DTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + double alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * CTRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + */ + void CTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Float2 alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * ZTRMM performs one of the matrix-matrix operations + * B := alpha*op(A)*B or B := alpha*B*op(A) + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}. + */ + void ZTRMM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Double2 alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * STRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32}. + */ + void STRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + float alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * DTRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T + * + * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64}. + */ + void DTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + double alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * CTRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + */ + void CTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Float2 alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * ZTRSM solves one of the matrix equations + * op(A)*X := alpha*B or X*op(A) := alpha*B + * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H + * + * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether matrix A is upper or lower triangular. + * @param TransA The type of transpose applied to matrix A. + * @param Diag Specifies whether or not A is unit triangular. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}. + */ + void ZTRSM(RsBlasSide Side, RsBlasUplo Uplo, RsBlasTranspose TransA, RsBlasDiag Diag, + Double2 alpha, sp<Allocation> A, sp<Allocation> B); + + /** + * CHEMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CHEMM(RsBlasSide Side, RsBlasUplo Uplo, Float2 alpha, sp<Allocation> A, + sp<Allocation> B, Float2 beta, sp<Allocation> C); + + /** + * ZHEMM performs one of the matrix-matrix operations + * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html + * + * @param Side Specifies whether the symmetric matrix A appears on the left or right. + * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}. + */ + void ZHEMM(RsBlasSide Side, RsBlasUplo Uplo, Double2 alpha, sp<Allocation> A, + sp<Allocation> B, Double2 beta, sp<Allocation> C); + + /** + * CHERK performs one of the hermitian rank k operations + * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, float alpha, sp<Allocation> A, + float beta, sp<Allocation> C); + + /** + * ZHERK performs one of the hermitian rank k operations + * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}. + */ + void ZHERK(RsBlasUplo Uplo, RsBlasTranspose Trans, double alpha, sp<Allocation> A, + double beta, sp<Allocation> C); + + /** + * CHER2K performs one of the hermitian rank 2k operations + * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F32_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F32_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F32_2}. + */ + void CHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Float2 alpha, sp<Allocation> A, + sp<Allocation> B, float beta, sp<Allocation> C); + + /** + * ZHER2K performs one of the hermitian rank 2k operations + * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C + * + * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html + * + * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. + * @param Trans The type of transpose applied to the operation. + * @param alpha The scalar alpha. + * @param A The input allocation contains matrix A, supported elements type: {Element#F64_2}. + * @param B The input allocation contains matrix B, supported elements type: {Element#F64_2}. + * @param beta The scalar beta. + * @param C The input allocation contains matrix C, supported elements type: {Element#F64_2}. + */ + void ZHER2K(RsBlasUplo Uplo, RsBlasTranspose Trans, Double2 alpha, sp<Allocation> A, + sp<Allocation> B, double beta, sp<Allocation> C); + + /** + * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) + * Calculations are done in 1.10.21 fixed-point format for the final output, + * just before there's a shift down to drop the fractional parts. The output + * values are gated to 0 to 255 to fit in a byte, but the 10-bit format + * gives some headroom to avoid wrapping around on small overflows. + * + * @param A The input allocation contains matrix A, supported elements type: {Element#U8}. + * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. + * @param B The input allocation contains matrix B, supported elements type: {Element#U8}. + * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. + * @param C The input allocation contains matrix C, supported elements type: {Element#U8}. + * @param c_offset The offset for all values in matrix C. + * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. + **/ + void BNNM(sp<Allocation> A, int a_offset, sp<Allocation> B, int b_offset, sp<Allocation> C, + int c_offset, int c_mult); +}; + /** * Intrinsic kernel for blending two Allocations. */ @@ -2116,276 +4327,6 @@ class ScriptIntrinsicYuvToRGB : public ScriptIntrinsic { }; -class Byte2 { - public: - int8_t x, y; - - Byte2(int8_t initX, int8_t initY) - : x(initX), y(initY) {} - Byte2() : x(0), y(0) {} -}; - -class Byte3 { - public: - int8_t x, y, z; - - Byte3(int8_t initX, int8_t initY, int8_t initZ) - : x(initX), y(initY), z(initZ) {} - Byte3() : x(0), y(0), z(0) {} -}; - -class Byte4 { - public: - int8_t x, y, z, w; - - Byte4(int8_t initX, int8_t initY, int8_t initZ, int8_t initW) - : x(initX), y(initY), z(initZ), w(initW) {} - Byte4() : x(0), y(0), z(0), w(0) {} -}; - -class UByte2 { - public: - uint8_t x, y; - - UByte2(uint8_t initX, uint8_t initY) - : x(initX), y(initY) {} - UByte2() : x(0), y(0) {} -}; - -class UByte3 { - public: - uint8_t x, y, z; - - UByte3(uint8_t initX, uint8_t initY, uint8_t initZ) - : x(initX), y(initY), z(initZ) {} - UByte3() : x(0), y(0), z(0) {} -}; - -class UByte4 { - public: - uint8_t x, y, z, w; - - UByte4(uint8_t initX, uint8_t initY, uint8_t initZ, uint8_t initW) - : x(initX), y(initY), z(initZ), w(initW) {} - UByte4() : x(0), y(0), z(0), w(0) {} -}; - -class Short2 { - public: - short x, y; - - Short2(short initX, short initY) - : x(initX), y(initY) {} - Short2() : x(0), y(0) {} -}; - -class Short3 { - public: - short x, y, z; - - Short3(short initX, short initY, short initZ) - : x(initX), y(initY), z(initZ) {} - Short3() : x(0), y(0), z(0) {} -}; - -class Short4 { - public: - short x, y, z, w; - - Short4(short initX, short initY, short initZ, short initW) - : x(initX), y(initY), z(initZ), w(initW) {} - Short4() : x(0), y(0), z(0), w(0) {} -}; - -class UShort2 { - public: - uint16_t x, y; - - UShort2(uint16_t initX, uint16_t initY) - : x(initX), y(initY) {} - UShort2() : x(0), y(0) {} -}; - -class UShort3 { - public: - uint16_t x, y, z; - - UShort3(uint16_t initX, uint16_t initY, uint16_t initZ) - : x(initX), y(initY), z(initZ) {} - UShort3() : x(0), y(0), z(0) {} -}; - -class UShort4 { - public: - uint16_t x, y, z, w; - - UShort4(uint16_t initX, uint16_t initY, uint16_t initZ, uint16_t initW) - : x(initX), y(initY), z(initZ), w(initW) {} - UShort4() : x(0), y(0), z(0), w(0) {} -}; - -class Int2 { - public: - int x, y; - - Int2(int initX, int initY) - : x(initX), y(initY) {} - Int2() : x(0), y(0) {} -}; - -class Int3 { - public: - int x, y, z; - - Int3(int initX, int initY, int initZ) - : x(initX), y(initY), z(initZ) {} - Int3() : x(0), y(0), z(0) {} -}; - -class Int4 { - public: - int x, y, z, w; - - Int4(int initX, int initY, int initZ, int initW) - : x(initX), y(initY), z(initZ), w(initW) {} - Int4() : x(0), y(0), z(0), w(0) {} -}; - -class UInt2 { - public: - uint32_t x, y; - - UInt2(uint32_t initX, uint32_t initY) - : x(initX), y(initY) {} - UInt2() : x(0), y(0) {} -}; - -class UInt3 { - public: - uint32_t x, y, z; - - UInt3(uint32_t initX, uint32_t initY, uint32_t initZ) - : x(initX), y(initY), z(initZ) {} - UInt3() : x(0), y(0), z(0) {} -}; - -class UInt4 { - public: - uint32_t x, y, z, w; - - UInt4(uint32_t initX, uint32_t initY, uint32_t initZ, uint32_t initW) - : x(initX), y(initY), z(initZ), w(initW) {} - UInt4() : x(0), y(0), z(0), w(0) {} -}; - -class Long2 { - public: - int64_t x, y; - - Long2(int64_t initX, int64_t initY) - : x(initX), y(initY) {} - Long2() : x(0), y(0) {} -}; - -class Long3 { - public: - int64_t x, y, z; - - Long3(int64_t initX, int64_t initY, int64_t initZ) - : x(initX), y(initY), z(initZ) {} - Long3() : x(0), y(0), z(0) {} -}; - -class Long4 { - public: - int64_t x, y, z, w; - - Long4(int64_t initX, int64_t initY, int64_t initZ, int64_t initW) - : x(initX), y(initY), z(initZ), w(initW) {} - Long4() : x(0), y(0), z(0), w(0) {} -}; - -class ULong2 { - public: - uint64_t x, y; - - ULong2(uint64_t initX, uint64_t initY) - : x(initX), y(initY) {} - ULong2() : x(0), y(0) {} -}; - -class ULong3 { - public: - uint64_t x, y, z; - - ULong3(uint64_t initX, uint64_t initY, uint64_t initZ) - : x(initX), y(initY), z(initZ) {} - ULong3() : x(0), y(0), z(0) {} -}; - -class ULong4 { - public: - uint64_t x, y, z, w; - - ULong4(uint64_t initX, uint64_t initY, uint64_t initZ, uint64_t initW) - : x(initX), y(initY), z(initZ), w(initW) {} - ULong4() : x(0), y(0), z(0), w(0) {} -}; - -class Float2 { - public: - float x, y; - - Float2(float initX, float initY) - : x(initX), y(initY) {} - Float2() : x(0), y(0) {} -}; - -class Float3 { - public: - float x, y, z; - - Float3(float initX, float initY, float initZ) - : x(initX), y(initY), z(initZ) {} - Float3() : x(0.f), y(0.f), z(0.f) {} -}; - -class Float4 { - public: - float x, y, z, w; - - Float4(float initX, float initY, float initZ, float initW) - : x(initX), y(initY), z(initZ), w(initW) {} - Float4() : x(0.f), y(0.f), z(0.f), w(0.f) {} -}; - -class Double2 { - public: - double x, y; - - Double2(double initX, double initY) - : x(initX), y(initY) {} - Double2() : x(0), y(0) {} -}; - -class Double3 { - public: - double x, y, z; - - Double3(double initX, double initY, double initZ) - : x(initX), y(initY), z(initZ) {} - Double3() : x(0), y(0), z(0) {} -}; - -class Double4 { - public: - double x, y, z, w; - - Double4(double initX, double initY, double initZ, double initW) - : x(initX), y(initY), z(initZ), w(initW) {} - Double4() : x(0), y(0), z(0), w(0) {} -}; - } } |
