Search in sources :

Example 1 with KahanPlusSq

use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.

the class LibMatrixAgg method aggregateUnaryMatrixDense.

private static void aggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) throws DMLRuntimeException {
    final int m = in.rlen;
    final int n = in.clen;
    double[] a = in.getDenseBlock();
    double[] c = out.getDenseBlock();
    switch(optype) {
        case //SUM/TRACE via k+, 
        KAHAN_SUM:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (// SUM
                ixFn instanceof ReduceAll)
                    d_uakp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (//ROWSUM
                ixFn instanceof ReduceCol)
                    d_uarkp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (//COLSUM
                ixFn instanceof ReduceRow)
                    d_uackp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (//TRACE
                ixFn instanceof ReduceDiag)
                    d_uakptrace(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                break;
            }
        case //SUM_SQ via k+,
        KAHAN_SUM_SQ:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (//SUM_SQ
                ixFn instanceof ReduceAll)
                    d_uasqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                else if (//ROWSUM_SQ
                ixFn instanceof ReduceCol)
                    d_uarsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                else if (//COLSUM_SQ
                ixFn instanceof ReduceRow)
                    d_uacsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                break;
            }
        case //CUMSUM
        CUM_KAHAN_SUM:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
                d_ucumkp(a, null, c, m, n, kbuff, kplus, rl, ru);
                break;
            }
        case //CUMPROD
        CUM_PROD:
            {
                d_ucumm(a, null, c, m, n, rl, ru);
                break;
            }
        case CUM_MIN:
        case CUM_MAX:
            {
                double init = Double.MAX_VALUE * ((optype == AggType.CUM_MAX) ? -1 : 1);
                d_ucummxx(a, null, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MIN:
        case //MAX/MIN
        MAX:
            {
                double init = Double.MAX_VALUE * ((optype == AggType.MAX) ? -1 : 1);
                if (// MIN/MAX
                ixFn instanceof ReduceAll)
                    d_uamxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                else if (//ROWMIN/ROWMAX
                ixFn instanceof ReduceCol)
                    d_uarmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                else if (//COLMIN/COLMAX
                ixFn instanceof ReduceRow)
                    d_uacmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MAX_INDEX:
            {
                double init = -Double.MAX_VALUE;
                if (//ROWINDEXMAX
                ixFn instanceof ReduceCol)
                    d_uarimxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MIN_INDEX:
            {
                double init = Double.MAX_VALUE;
                if (//ROWINDEXMIN
                ixFn instanceof ReduceCol)
                    d_uarimin(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case //MEAN
        MEAN:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (// MEAN
                ixFn instanceof ReduceAll)
                    d_uamean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
                else if (//ROWMEAN
                ixFn instanceof ReduceCol)
                    d_uarmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
                else if (//COLMEAN
                ixFn instanceof ReduceRow)
                    d_uacmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
                break;
            }
        case //VAR
        VAR:
            {
                CM_COV_Object cbuff = new CM_COV_Object();
                if (//VAR
                ixFn instanceof ReduceAll)
                    d_uavar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
                else if (//ROWVAR
                ixFn instanceof ReduceCol)
                    d_uarvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
                else if (//COLVAR
                ixFn instanceof ReduceRow)
                    d_uacvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
                break;
            }
        case //PROD
        PROD:
            {
                if (// PROD
                ixFn instanceof ReduceAll)
                    d_uam(a, c, m, n, rl, ru);
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) Mean(org.apache.sysml.runtime.functionobjects.Mean) ReduceDiag(org.apache.sysml.runtime.functionobjects.ReduceDiag) CM(org.apache.sysml.runtime.functionobjects.CM) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 2 with KahanPlusSq

use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.

the class LibMatrixAgg method aggregateUnaryMatrixSparse.

private static void aggregateUnaryMatrixSparse(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) throws DMLRuntimeException {
    final int m = in.rlen;
    final int n = in.clen;
    SparseBlock a = in.getSparseBlock();
    double[] c = out.getDenseBlock();
    switch(optype) {
        case //SUM via k+
        KAHAN_SUM:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (// SUM
                ixFn instanceof ReduceAll)
                    s_uakp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (//ROWSUM
                ixFn instanceof ReduceCol)
                    s_uarkp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (//COLSUM
                ixFn instanceof ReduceRow)
                    s_uackp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (//TRACE
                ixFn instanceof ReduceDiag)
                    s_uakptrace(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
                break;
            }
        case //SUM_SQ via k+
        KAHAN_SUM_SQ:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (//SUM_SQ
                ixFn instanceof ReduceAll)
                    s_uasqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                else if (//ROWSUM_SQ
                ixFn instanceof ReduceCol)
                    s_uarsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                else if (//COLSUM_SQ
                ixFn instanceof ReduceRow)
                    s_uacsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                break;
            }
        case //CUMSUM
        CUM_KAHAN_SUM:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
                s_ucumkp(a, null, c, m, n, kbuff, kplus, rl, ru);
                break;
            }
        case //CUMPROD
        CUM_PROD:
            {
                s_ucumm(a, null, c, m, n, rl, ru);
                break;
            }
        case CUM_MIN:
        case CUM_MAX:
            {
                double init = Double.MAX_VALUE * ((optype == AggType.CUM_MAX) ? -1 : 1);
                s_ucummxx(a, null, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MIN:
        case //MAX/MIN
        MAX:
            {
                double init = Double.MAX_VALUE * ((optype == AggType.MAX) ? -1 : 1);
                if (// MIN/MAX
                ixFn instanceof ReduceAll)
                    s_uamxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                else if (//ROWMIN/ROWMAX
                ixFn instanceof ReduceCol)
                    s_uarmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                else if (//COLMIN/COLMAX
                ixFn instanceof ReduceRow)
                    s_uacmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MAX_INDEX:
            {
                double init = -Double.MAX_VALUE;
                if (//ROWINDEXMAX
                ixFn instanceof ReduceCol)
                    s_uarimxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MIN_INDEX:
            {
                double init = Double.MAX_VALUE;
                if (//ROWINDEXMAX
                ixFn instanceof ReduceCol)
                    s_uarimin(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MEAN:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (// MEAN
                ixFn instanceof ReduceAll)
                    s_uamean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
                else if (//ROWMEAN
                ixFn instanceof ReduceCol)
                    s_uarmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
                else if (//COLMEAN
                ixFn instanceof ReduceRow)
                    s_uacmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
                break;
            }
        case //VAR
        VAR:
            {
                CM_COV_Object cbuff = new CM_COV_Object();
                if (//VAR
                ixFn instanceof ReduceAll)
                    s_uavar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
                else if (//ROWVAR
                ixFn instanceof ReduceCol)
                    s_uarvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
                else if (//COLVAR
                ixFn instanceof ReduceRow)
                    s_uacvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
                break;
            }
        case //PROD
        PROD:
            {
                if (// PROD
                ixFn instanceof ReduceAll)
                    s_uam(a, c, m, n, rl, ru);
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) Mean(org.apache.sysml.runtime.functionobjects.Mean) ReduceDiag(org.apache.sysml.runtime.functionobjects.ReduceDiag) CM(org.apache.sysml.runtime.functionobjects.CM) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 3 with KahanPlusSq

use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.

the class LibMatrixCUDA method unaryAggregate.

//********************************************************************/
//***************** END OF MATRIX MULTIPLY Functions *****************/
//********************************************************************/
//********************************************************************/
//****************  UNARY AGGREGATE Functions ************************/
//********************************************************************/
/**
	 * Entry point to perform Unary aggregate operations on the GPU.
	 * The execution context object is used to allocate memory for the GPU.
	 * @param ec			Instance of {@link ExecutionContext}, from which the output variable will be allocated
	 * @param gCtx    a valid {@link GPUContext}
	 * @param instName name of the invoking instruction to record{@link Statistics}.
	 * @param in1			input matrix
	 * @param output	output matrix/scalar name
	 * @param op			Instance of {@link AggregateUnaryOperator} which encapsulates the direction of reduction/aggregation and the reduction operation.
	 * @throws DMLRuntimeException if {@link DMLRuntimeException} occurs
	 */
public static void unaryAggregate(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, String output, AggregateUnaryOperator op) throws DMLRuntimeException {
    if (ec.getGPUContext() != gCtx)
        throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
    LOG.trace("GPU : unaryAggregate" + ", GPUContext=" + gCtx);
    final int REDUCTION_ALL = 1;
    final int REDUCTION_ROW = 2;
    final int REDUCTION_COL = 3;
    final int REDUCTION_DIAG = 4;
    // A kahan sum implemention is not provided. is a "uak+" or other kahan operator is encountered,
    // it just does regular summation reduction.
    final int OP_PLUS = 1;
    final int OP_PLUS_SQ = 2;
    final int OP_MEAN = 3;
    final int OP_VARIANCE = 4;
    final int OP_MULTIPLY = 5;
    final int OP_MAX = 6;
    final int OP_MIN = 7;
    final int OP_MAXINDEX = 8;
    final int OP_MININDEX = 9;
    // Sanity Checks
    if (!in1.getGPUObject(gCtx).isAllocated())
        throw new DMLRuntimeException("Internal Error - The input is not allocated for a GPU Aggregate Unary:" + in1.getGPUObject(gCtx).isAllocated());
    boolean isSparse = in1.getGPUObject(gCtx).isSparse();
    IndexFunction indexFn = op.indexFn;
    AggregateOperator aggOp = op.aggOp;
    // Convert Reduction direction to a number to pass to CUDA kernel
    int reductionDirection = -1;
    if (indexFn instanceof ReduceAll) {
        reductionDirection = REDUCTION_ALL;
    } else if (indexFn instanceof ReduceRow) {
        reductionDirection = REDUCTION_ROW;
    } else if (indexFn instanceof ReduceCol) {
        reductionDirection = REDUCTION_COL;
    } else if (indexFn instanceof ReduceDiag) {
        reductionDirection = REDUCTION_DIAG;
    } else {
        throw new DMLRuntimeException("Internal Error - Invalid index function type, only reducing along rows, columns, diagonals or all elements is supported in Aggregate Unary operations");
    }
    assert reductionDirection != -1 : "Internal Error - Incorrect type of reduction direction set for aggregate unary GPU instruction";
    // Convert function type to a number to pass to the CUDA Kernel
    int opIndex = -1;
    if (aggOp.increOp.fn instanceof KahanPlus) {
        opIndex = OP_PLUS;
    } else if (aggOp.increOp.fn instanceof KahanPlusSq) {
        opIndex = OP_PLUS_SQ;
    } else if (aggOp.increOp.fn instanceof Mean) {
        opIndex = OP_MEAN;
    } else if (aggOp.increOp.fn instanceof CM) {
        assert ((CM) aggOp.increOp.fn).getAggOpType() == CMOperator.AggregateOperationTypes.VARIANCE : "Internal Error - Invalid Type of CM operator for Aggregate Unary operation on GPU";
        opIndex = OP_VARIANCE;
    } else if (aggOp.increOp.fn instanceof Plus) {
        opIndex = OP_PLUS;
    } else if (aggOp.increOp.fn instanceof Multiply) {
        opIndex = OP_MULTIPLY;
    } else if (aggOp.increOp.fn instanceof Builtin) {
        Builtin b = (Builtin) aggOp.increOp.fn;
        switch(b.bFunc) {
            case MAX:
                opIndex = OP_MAX;
                break;
            case MIN:
                opIndex = OP_MIN;
                break;
            case MAXINDEX:
                opIndex = OP_MAXINDEX;
                break;
            case MININDEX:
                opIndex = OP_MININDEX;
                break;
            default:
                new DMLRuntimeException("Internal Error - Unsupported Builtin Function for Aggregate unary being done on GPU");
        }
    } else {
        throw new DMLRuntimeException("Internal Error - Aggregate operator has invalid Value function");
    }
    assert opIndex != -1 : "Internal Error - Incorrect type of operation set for aggregate unary GPU instruction";
    int rlen = (int) in1.getNumRows();
    int clen = (int) in1.getNumColumns();
    if (isSparse) {
        // The strategy for the time being is to convert sparse to dense
        // until a sparse specific kernel is written.
        in1.getGPUObject(gCtx).sparseToDense(instName);
    // long nnz = in1.getNnz();
    // assert nnz > 0 : "Internal Error - number of non zeroes set to " + nnz + " in Aggregate Binary for GPU";
    // MatrixObject out = ec.getSparseMatrixOutputForGPUInstruction(output, nnz);
    // throw new DMLRuntimeException("Internal Error - Not implemented");
    }
    Pointer out = null;
    if (reductionDirection == REDUCTION_COL || reductionDirection == REDUCTION_ROW) {
        // Matrix output
        MatrixObject out1 = getDenseMatrixOutputForGPUInstruction(ec, instName, output);
        out = getDensePointer(gCtx, out1, instName);
    }
    Pointer in = getDensePointer(gCtx, in1, instName);
    int size = rlen * clen;
    // For scalars, set the scalar output in the Execution Context object
    switch(opIndex) {
        case OP_PLUS:
            {
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
                            ec.setScalarOutput(output, new DoubleObject(result));
                            break;
                        }
                    case REDUCTION_COL:
                        {
                            // The names are a bit misleading, REDUCTION_COL refers to the direction (reduce all elements in a column)
                            reduceRow(gCtx, instName, "reduce_row_sum", in, out, rlen, clen);
                            break;
                        }
                    case REDUCTION_ROW:
                        {
                            reduceCol(gCtx, instName, "reduce_col_sum", in, out, rlen, clen);
                            break;
                        }
                    case REDUCTION_DIAG:
                        throw new DMLRuntimeException("Internal Error - Row, Column and Diag summation not implemented yet");
                }
                break;
            }
        case OP_PLUS_SQ:
            {
                // Calculate the squares in a temporary object tmp
                Pointer tmp = gCtx.allocate(instName, size * Sizeof.DOUBLE);
                squareMatrix(gCtx, instName, in, tmp, rlen, clen);
                // Then do the sum on the temporary object and free it
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_sum", tmp, size);
                            ec.setScalarOutput(output, new DoubleObject(result));
                            break;
                        }
                    case REDUCTION_COL:
                        {
                            // The names are a bit misleading, REDUCTION_COL refers to the direction (reduce all elements in a column)
                            reduceRow(gCtx, instName, "reduce_row_sum", tmp, out, rlen, clen);
                            break;
                        }
                    case REDUCTION_ROW:
                        {
                            reduceCol(gCtx, instName, "reduce_col_sum", tmp, out, rlen, clen);
                            break;
                        }
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for summation squared");
                }
                gCtx.cudaFreeHelper(instName, tmp);
                break;
            }
        case OP_MEAN:
            {
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
                            double mean = result / size;
                            ec.setScalarOutput(output, new DoubleObject(mean));
                            break;
                        }
                    case REDUCTION_COL:
                        {
                            reduceRow(gCtx, instName, "reduce_row_mean", in, out, rlen, clen);
                            break;
                        }
                    case REDUCTION_ROW:
                        {
                            reduceCol(gCtx, instName, "reduce_col_mean", in, out, rlen, clen);
                            break;
                        }
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for mean");
                }
                break;
            }
        case OP_MULTIPLY:
            {
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_prod", in, size);
                            ec.setScalarOutput(output, new DoubleObject(result));
                            break;
                        }
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for multiplication");
                }
                break;
            }
        case OP_MAX:
            {
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_max", in, size);
                            ec.setScalarOutput(output, new DoubleObject(result));
                            break;
                        }
                    case REDUCTION_COL:
                        {
                            reduceRow(gCtx, instName, "reduce_row_max", in, out, rlen, clen);
                            break;
                        }
                    case REDUCTION_ROW:
                        {
                            reduceCol(gCtx, instName, "reduce_col_max", in, out, rlen, clen);
                            break;
                        }
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for max");
                }
                break;
            }
        case OP_MIN:
            {
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_min", in, size);
                            ec.setScalarOutput(output, new DoubleObject(result));
                            break;
                        }
                    case REDUCTION_COL:
                        {
                            reduceRow(gCtx, instName, "reduce_row_min", in, out, rlen, clen);
                            break;
                        }
                    case REDUCTION_ROW:
                        {
                            reduceCol(gCtx, instName, "reduce_col_min", in, out, rlen, clen);
                            break;
                        }
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for min");
                }
                break;
            }
        case OP_VARIANCE:
            {
                // Temporary GPU array for
                Pointer tmp = gCtx.allocate(instName, size * Sizeof.DOUBLE);
                Pointer tmp2 = gCtx.allocate(instName, size * Sizeof.DOUBLE);
                switch(reductionDirection) {
                    case REDUCTION_ALL:
                        {
                            double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
                            double mean = result / size;
                            // Subtract mean from every element in the matrix
                            ScalarOperator minusOp = new RightScalarOperator(Minus.getMinusFnObject(), mean);
                            matrixScalarOp(gCtx, instName, in, mean, rlen, clen, tmp, minusOp);
                            squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
                            double result2 = reduceAll(gCtx, instName, "reduce_sum", tmp2, size);
                            double variance = result2 / (size - 1);
                            ec.setScalarOutput(output, new DoubleObject(variance));
                            break;
                        }
                    case REDUCTION_COL:
                        {
                            reduceRow(gCtx, instName, "reduce_row_mean", in, out, rlen, clen);
                            // Subtract the row-wise mean from every element in the matrix
                            BinaryOperator minusOp = new BinaryOperator(Minus.getMinusFnObject());
                            matrixMatrixOp(gCtx, instName, in, out, rlen, clen, VectorShape.NONE.code(), VectorShape.COLUMN.code(), tmp, minusOp);
                            squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
                            Pointer tmpRow = gCtx.allocate(instName, rlen * Sizeof.DOUBLE);
                            reduceRow(gCtx, instName, "reduce_row_sum", tmp2, tmpRow, rlen, clen);
                            ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), clen - 1);
                            matrixScalarOp(gCtx, instName, tmpRow, clen - 1, rlen, 1, out, divideOp);
                            gCtx.cudaFreeHelper(instName, tmpRow);
                            break;
                        }
                    case REDUCTION_ROW:
                        {
                            reduceCol(gCtx, instName, "reduce_col_mean", in, out, rlen, clen);
                            // Subtract the columns-wise mean from every element in the matrix
                            BinaryOperator minusOp = new BinaryOperator(Minus.getMinusFnObject());
                            matrixMatrixOp(gCtx, instName, in, out, rlen, clen, VectorShape.NONE.code(), VectorShape.ROW.code(), tmp, minusOp);
                            squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
                            Pointer tmpCol = gCtx.allocate(instName, clen * Sizeof.DOUBLE);
                            reduceCol(gCtx, instName, "reduce_col_sum", tmp2, tmpCol, rlen, clen);
                            ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), rlen - 1);
                            matrixScalarOp(gCtx, instName, tmpCol, rlen - 1, 1, clen, out, divideOp);
                            gCtx.cudaFreeHelper(instName, tmpCol);
                            break;
                        }
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for variance");
                }
                gCtx.cudaFreeHelper(instName, tmp);
                gCtx.cudaFreeHelper(instName, tmp2);
                break;
            }
        case OP_MAXINDEX:
            {
                switch(reductionDirection) {
                    case REDUCTION_COL:
                        throw new DMLRuntimeException("Internal Error - Column maxindex of matrix not implemented yet for GPU ");
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for maxindex");
                }
            // break;
            }
        case OP_MININDEX:
            {
                switch(reductionDirection) {
                    case REDUCTION_COL:
                        throw new DMLRuntimeException("Internal Error - Column minindex of matrix not implemented yet for GPU ");
                    default:
                        throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for minindex");
                }
            // break;
            }
        default:
            throw new DMLRuntimeException("Internal Error - Invalid GPU Unary aggregate function!");
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) ScalarOperator(org.apache.sysml.runtime.matrix.operators.ScalarOperator) LeftScalarOperator(org.apache.sysml.runtime.matrix.operators.LeftScalarOperator) RightScalarOperator(org.apache.sysml.runtime.matrix.operators.RightScalarOperator) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) Mean(org.apache.sysml.runtime.functionobjects.Mean) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ReduceDiag(org.apache.sysml.runtime.functionobjects.ReduceDiag) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) CM(org.apache.sysml.runtime.functionobjects.CM) CSRPointer(org.apache.sysml.runtime.instructions.gpu.context.CSRPointer) Pointer(jcuda.Pointer) RightScalarOperator(org.apache.sysml.runtime.matrix.operators.RightScalarOperator) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IndexFunction(org.apache.sysml.runtime.functionobjects.IndexFunction) Multiply(org.apache.sysml.runtime.functionobjects.Multiply) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) Plus(org.apache.sysml.runtime.functionobjects.Plus) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 4 with KahanPlusSq

use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.

the class CompressedMatrixBlock method aggregateUnaryOperations.

@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) throws DMLRuntimeException {
    //call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
    }
    //check for supported operations
    if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
        throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    //prepare output dimensions
    CellIndex tempCellIndex = new CellIndex(-1, -1);
    op.indexFn.computeDimension(rlen, clen, tempCellIndex);
    if (op.aggOp.correctionExists) {
        switch(op.aggOp.correctionLocation) {
            case LASTROW:
                tempCellIndex.row++;
                break;
            case LASTCOLUMN:
                tempCellIndex.column++;
                break;
            case LASTTWOROWS:
                tempCellIndex.row += 2;
                break;
            case LASTTWOCOLUMNS:
                tempCellIndex.column += 2;
                break;
            default:
                throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
        }
    }
    // initialize and allocate the result
    if (result == null)
        result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
    else
        result.reset(tempCellIndex.row, tempCellIndex.column, false);
    MatrixBlock ret = (MatrixBlock) result;
    ret.allocateDenseBlock();
    //special handling init value for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        double val = Double.MAX_VALUE * ((((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? -1 : 1);
        Arrays.fill(ret.getDenseBlock(), val);
    }
    //core unary aggregate
    if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
        //multi-threaded execution of all groups 
        ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
        ColGroupUncompressed uc = getUncompressedColGroup();
        try {
            //compute uncompressed column group in parallel (otherwise bottleneck)
            if (uc != null)
                ret = (MatrixBlock) uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);
            //compute all compressed column groups
            ExecutorService pool = Executors.newFixedThreadPool(op.getNumThreads());
            ArrayList<UnaryAggregateTask> tasks = new ArrayList<UnaryAggregateTask>();
            if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
                int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
                for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
            } else
                for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
            List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
            pool.shutdown();
            //aggregate partial results
            if (op.indexFn instanceof ReduceAll) {
                if (op.aggOp.increOp.fn instanceof KahanFunction) {
                    KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        ((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
                    }
                    ret.quickSetValue(0, 0, kbuff._sum);
                } else {
                    double val = ret.quickGetValue(0, 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        val = op.aggOp.increOp.fn.execute(val, tmp);
                    }
                    ret.quickSetValue(0, 0, val);
                }
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    } else {
        //process UC column group
        for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
            grp.unaryAggregateOperations(op, ret);
        //process OLE/RLE column groups
        aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
    }
    //special handling zeros for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        int[] rnnz = new int[rlen];
        for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
        Builtin builtin = (Builtin) op.aggOp.increOp.fn;
        for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
            ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
    }
    //drop correction if necessary
    if (op.aggOp.correctionExists && inCP)
        ret.dropLastRowsOrColums(op.aggOp.correctionLocation);
    //post-processing
    ret.recomputeNonZeros();
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
    return ret;
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) Future(java.util.concurrent.Future) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 5 with KahanPlusSq

use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.

the class LibMatrixAgg method getAggType.

private static AggType getAggType(AggregateUnaryOperator op) {
    ValueFunction vfn = op.aggOp.increOp.fn;
    IndexFunction ifn = op.indexFn;
    //(kahan) sum / sum squared / trace (for ReduceDiag)
    if (vfn instanceof KahanFunction && (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag)) {
        if (vfn instanceof KahanPlus)
            return AggType.KAHAN_SUM;
        else if (vfn instanceof KahanPlusSq)
            return AggType.KAHAN_SUM_SQ;
    }
    //mean
    if (vfn instanceof Mean && (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
        return AggType.MEAN;
    }
    //variance
    if (vfn instanceof CM && ((CM) vfn).getAggOpType() == AggregateOperationTypes.VARIANCE && (op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
        return AggType.VAR;
    }
    //prod
    if (vfn instanceof Multiply && ifn instanceof ReduceAll) {
        return AggType.PROD;
    }
    //min / max
    if (vfn instanceof Builtin && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
        BuiltinCode bfcode = ((Builtin) vfn).bFunc;
        switch(bfcode) {
            case MAX:
                return AggType.MAX;
            case MIN:
                return AggType.MIN;
            case MAXINDEX:
                return AggType.MAX_INDEX;
            case MININDEX:
                return AggType.MIN_INDEX;
            //do nothing
            default:
        }
    }
    return AggType.INVALID;
}
Also used : ValueFunction(org.apache.sysml.runtime.functionobjects.ValueFunction) ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) Mean(org.apache.sysml.runtime.functionobjects.Mean) ReduceDiag(org.apache.sysml.runtime.functionobjects.ReduceDiag) CM(org.apache.sysml.runtime.functionobjects.CM) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) IndexFunction(org.apache.sysml.runtime.functionobjects.IndexFunction) BuiltinCode(org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) Multiply(org.apache.sysml.runtime.functionobjects.Multiply) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Aggregations

Builtin (org.apache.sysml.runtime.functionobjects.Builtin)5 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)5 KahanPlusSq (org.apache.sysml.runtime.functionobjects.KahanPlusSq)5 ReduceAll (org.apache.sysml.runtime.functionobjects.ReduceAll)5 ReduceCol (org.apache.sysml.runtime.functionobjects.ReduceCol)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)4 CM (org.apache.sysml.runtime.functionobjects.CM)4 Mean (org.apache.sysml.runtime.functionobjects.Mean)4 ReduceDiag (org.apache.sysml.runtime.functionobjects.ReduceDiag)4 ReduceRow (org.apache.sysml.runtime.functionobjects.ReduceRow)4 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)3 IndexFunction (org.apache.sysml.runtime.functionobjects.IndexFunction)2 KahanFunction (org.apache.sysml.runtime.functionobjects.KahanFunction)2 Multiply (org.apache.sysml.runtime.functionobjects.Multiply)2 CM_COV_Object (org.apache.sysml.runtime.instructions.cp.CM_COV_Object)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 Pointer (jcuda.Pointer)1