use of org.apache.sysml.runtime.matrix.operators.LeftScalarOperator in project incubator-systemml by apache.
the class LibMatrixCUDA method matrixMatrixOp.
/**
* Utility to launch binary cellwise matrix-matrix operations CUDA kernel
* @param gCtx a valid {@link GPUContext}
* @param ec execution context
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param in1 left input matrix
* @param in2 right input matrix
* @param outputName output variable name
* @param isLeftTransposed true if left matrix is transposed
* @param isRightTransposed true if right matrix is transposed
* @param op operator
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private static void matrixMatrixOp(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2, String outputName, boolean isLeftTransposed, boolean isRightTransposed, BinaryOperator op) throws DMLRuntimeException {
if (ec.getGPUContext() != gCtx)
throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
boolean isEmpty1 = isSparseAndEmpty(gCtx, in1);
boolean isEmpty2 = isSparseAndEmpty(gCtx, in2);
int rlenA = (int) in1.getNumRows();
int rlenB = (int) in2.getNumRows();
int clenA = (int) in1.getNumColumns();
int clenB = (int) in2.getNumColumns();
int vecStatusA = getVectorStatus(rlenA, clenA).code();
int vecStatusB = getVectorStatus(rlenB, clenB).code();
if (isEmpty1 && isEmpty2) {
MatrixObject out = ec.getMatrixObject(outputName);
ec.allocateGPUMatrixObject(outputName);
// When both inputs are empty, the output is empty too (except in the case of division)
if (op.fn instanceof Divide) {
out.getGPUObject(gCtx).allocateAndFillDense(Double.NaN);
} else {
out.getGPUObject(gCtx).allocateSparseAndEmpty();
}
} else // Check for M1 * M2 when M1 is empty; if M2 is a vector then fallback to general case
if (isEmpty1 && clenB != 1 && rlenB != 1) {
// C = empty_in1 op in2 ==> becomes ==> C = 0.0 op in2
matrixScalarArithmetic(ec, gCtx, instName, in2, outputName, isRightTransposed, new LeftScalarOperator(op.fn, 0.0));
} else // Check for M1 * M2 when M2 is empty; if M1 is a vector then fallback to general case
if (isEmpty2 && clenA != 1 && rlenA != 1) {
// C = in1 op empty_in2 ==> becomes ==> C = in1 op 0.0
matrixScalarArithmetic(ec, gCtx, instName, in1, outputName, isLeftTransposed, new RightScalarOperator(op.fn, 0.0));
} else {
// TODO: FIXME: Implement sparse binCellSparseOp kernel
Pointer A = getDensePointer(gCtx, in1, instName);
// TODO: FIXME: Implement sparse binCellSparseOp kernel
Pointer B = getDensePointer(gCtx, in2, instName);
MatrixObject out = ec.getMatrixObject(outputName);
// Allocated the dense output matrix
getDenseMatrixOutputForGPUInstruction(ec, instName, outputName);
Pointer C = getDensePointer(gCtx, out, instName);
int maxRlen = Math.max(rlenA, rlenB);
int maxClen = Math.max(clenA, clenB);
matrixMatrixOp(gCtx, instName, A, B, maxRlen, maxClen, vecStatusA, vecStatusB, C, op);
}
}
Aggregations