Examples with Minus1Multiply - org.apache.sysml.runtime.functionobjects.Minus1Multiply

Example 1 with Minus1Multiply

use of org.apache.sysml.runtime.functionobjects.Minus1Multiply in project incubator-systemml by apache.

the class LibMatrixCUDA method matrixMatrixOp.

/**
 * Utility to launch binary cellwise matrix-matrix operations CUDA kernel
 *
 * @param gCtx              a valid {@link GPUContext}
 * @param ec                execution context
 * @param instName          the invoking instruction's name for record {@link Statistics}.
 * @param in1               left input matrix
 * @param in2               right input matrix
 * @param outputName        output variable name
 * @param isLeftTransposed  true if left matrix is transposed
 * @param isRightTransposed true if right matrix is transposed
 * @param op                operator
 */
private static void matrixMatrixOp(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2, String outputName, boolean isLeftTransposed, boolean isRightTransposed, BinaryOperator op) {
    if (ec.getGPUContext(0) != gCtx)
        throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
    boolean isEmpty1 = isSparseAndEmpty(gCtx, in1);
    boolean isEmpty2 = isSparseAndEmpty(gCtx, in2);
    int rlenA = toInt(in1.getNumRows());
    int rlenB = toInt(in2.getNumRows());
    int clenA = toInt(in1.getNumColumns());
    int clenB = toInt(in2.getNumColumns());
    int vecStatusA = getVectorStatus(rlenA, clenA).code();
    int vecStatusB = getVectorStatus(rlenB, clenB).code();
    if (isLeftTransposed || isRightTransposed) {
        throw new DMLRuntimeException("Unsupported operator: GPU transposed binary op " + isLeftTransposed + " " + isRightTransposed);
    }
    long outRLen = Math.max(rlenA, rlenB);
    long outCLen = Math.max(clenA, clenB);
    if (isEmpty1 && isEmpty2) {
        MatrixObject out = ec.allocateGPUMatrixObject(outputName, outRLen, outCLen);
        // When both inputs are empty, the output is empty too (except in the case of division)
        if (op.fn instanceof Divide || op.fn instanceof IntegerDivide || op.fn instanceof Modulus) {
            out.getGPUObject(gCtx).allocateAndFillDense(Double.NaN);
        } else if (op.fn instanceof Minus1Multiply) {
            out.getGPUObject(gCtx).allocateAndFillDense(1.0);
        } else {
            out.getGPUObject(gCtx).allocateSparseAndEmpty();
        }
    } else // Check for M1 * M2 when M1 is empty; if M2 is a vector then fallback to general case
    if (isEmpty1 && clenB != 1 && rlenB != 1) {
        // C = empty_in1 op in2 ==> becomes ==> C = 0.0 op in2
        matrixScalarArithmetic(ec, gCtx, instName, in2, outputName, isRightTransposed, new LeftScalarOperator(op.fn, 0.0));
    } else // Check for M1 * M2 when M2 is empty; if M1 is a vector then fallback to general case
    if (isEmpty2 && clenA != 1 && rlenA != 1) {
        // C = in1 op empty_in2 ==> becomes ==> C = in1 op 0.0
        matrixScalarArithmetic(ec, gCtx, instName, in1, outputName, isLeftTransposed, new RightScalarOperator(op.fn, 0.0));
    } else {
        // TODO: FIXME: Implement sparse binCellSparseOp kernel
        Pointer A = getDensePointer(gCtx, in1, instName);
        // TODO: FIXME: Implement sparse binCellSparseOp kernel
        Pointer B = getDensePointer(gCtx, in2, instName);
        // Allocated the dense output matrix
        MatrixObject out = null;
        try {
            out = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, outRLen, outCLen);
        } catch (DMLRuntimeException e) {
            throw new DMLRuntimeException("Incorrect dimensions: dimA:[" + rlenA + "," + clenA + "]" + " dimB:[" + rlenB + "," + clenB + "] out:[" + outRLen + "," + outCLen + "]", e);
        }
        Pointer C = getDensePointer(gCtx, out, instName);
        int maxRlen = Math.max(rlenA, rlenB);
        int maxClen = Math.max(clenA, clenB);
        matrixMatrixOp(gCtx, instName, A, B, maxRlen, maxClen, vecStatusA, vecStatusB, C, op);
    }
}

Also used : IntegerDivide(org.apache.sysml.runtime.functionobjects.IntegerDivide) Divide(org.apache.sysml.runtime.functionobjects.Divide) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Modulus(org.apache.sysml.runtime.functionobjects.Modulus) LeftScalarOperator(org.apache.sysml.runtime.matrix.operators.LeftScalarOperator) CSRPointer(org.apache.sysml.runtime.instructions.gpu.context.CSRPointer) Pointer(jcuda.Pointer) RightScalarOperator(org.apache.sysml.runtime.matrix.operators.RightScalarOperator) IntegerDivide(org.apache.sysml.runtime.functionobjects.IntegerDivide) Minus1Multiply(org.apache.sysml.runtime.functionobjects.Minus1Multiply) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with Minus1Multiply

use of org.apache.sysml.runtime.functionobjects.Minus1Multiply in project systemml by apache.

the class LibMatrixCUDA method matrixMatrixOp.

/**
 * Utility to launch binary cellwise matrix-matrix operations CUDA kernel
 *
 * @param gCtx              a valid {@link GPUContext}
 * @param ec                execution context
 * @param instName          the invoking instruction's name for record {@link Statistics}.
 * @param in1               left input matrix
 * @param in2               right input matrix
 * @param outputName        output variable name
 * @param isLeftTransposed  true if left matrix is transposed
 * @param isRightTransposed true if right matrix is transposed
 * @param op                operator
 */
private static void matrixMatrixOp(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2, String outputName, boolean isLeftTransposed, boolean isRightTransposed, BinaryOperator op) {
    if (ec.getGPUContext(0) != gCtx)
        throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
    boolean isEmpty1 = isSparseAndEmpty(gCtx, in1);
    boolean isEmpty2 = isSparseAndEmpty(gCtx, in2);
    int rlenA = toInt(in1.getNumRows());
    int rlenB = toInt(in2.getNumRows());
    int clenA = toInt(in1.getNumColumns());
    int clenB = toInt(in2.getNumColumns());
    int vecStatusA = getVectorStatus(rlenA, clenA).code();
    int vecStatusB = getVectorStatus(rlenB, clenB).code();
    if (isLeftTransposed || isRightTransposed) {
        throw new DMLRuntimeException("Unsupported operator: GPU transposed binary op " + isLeftTransposed + " " + isRightTransposed);
    }
    long outRLen = Math.max(rlenA, rlenB);
    long outCLen = Math.max(clenA, clenB);
    if (isEmpty1 && isEmpty2) {
        MatrixObject out = ec.allocateGPUMatrixObject(outputName, outRLen, outCLen);
        // When both inputs are empty, the output is empty too (except in the case of division)
        if (op.fn instanceof Divide || op.fn instanceof IntegerDivide || op.fn instanceof Modulus) {
            out.getGPUObject(gCtx).allocateAndFillDense(Double.NaN);
        } else if (op.fn instanceof Minus1Multiply) {
            out.getGPUObject(gCtx).allocateAndFillDense(1.0);
        } else {
            out.getGPUObject(gCtx).allocateSparseAndEmpty();
        }
    } else // Check for M1 * M2 when M1 is empty; if M2 is a vector then fallback to general case
    if (isEmpty1 && clenB != 1 && rlenB != 1) {
        // C = empty_in1 op in2 ==> becomes ==> C = 0.0 op in2
        matrixScalarArithmetic(ec, gCtx, instName, in2, outputName, isRightTransposed, new LeftScalarOperator(op.fn, 0.0));
    } else // Check for M1 * M2 when M2 is empty; if M1 is a vector then fallback to general case
    if (isEmpty2 && clenA != 1 && rlenA != 1) {
        // C = in1 op empty_in2 ==> becomes ==> C = in1 op 0.0
        matrixScalarArithmetic(ec, gCtx, instName, in1, outputName, isLeftTransposed, new RightScalarOperator(op.fn, 0.0));
    } else {
        // TODO: FIXME: Implement sparse binCellSparseOp kernel
        Pointer A = getDensePointer(gCtx, in1, instName);
        // TODO: FIXME: Implement sparse binCellSparseOp kernel
        Pointer B = getDensePointer(gCtx, in2, instName);
        // Allocated the dense output matrix
        MatrixObject out = null;
        try {
            out = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, outRLen, outCLen);
        } catch (DMLRuntimeException e) {
            throw new DMLRuntimeException("Incorrect dimensions: dimA:[" + rlenA + "," + clenA + "]" + " dimB:[" + rlenB + "," + clenB + "] out:[" + outRLen + "," + outCLen + "]", e);
        }
        Pointer C = getDensePointer(gCtx, out, instName);
        int maxRlen = Math.max(rlenA, rlenB);
        int maxClen = Math.max(clenA, clenB);
        matrixMatrixOp(gCtx, instName, A, B, maxRlen, maxClen, vecStatusA, vecStatusB, C, op);
    }
}

Aggregations

Pointer (jcuda.Pointer)2 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 Divide (org.apache.sysml.runtime.functionobjects.Divide)2 IntegerDivide (org.apache.sysml.runtime.functionobjects.IntegerDivide)2 Minus1Multiply (org.apache.sysml.runtime.functionobjects.Minus1Multiply)2 Modulus (org.apache.sysml.runtime.functionobjects.Modulus)2 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)2 LeftScalarOperator (org.apache.sysml.runtime.matrix.operators.LeftScalarOperator)2 RightScalarOperator (org.apache.sysml.runtime.matrix.operators.RightScalarOperator)2