Search in sources :

Example 6 with KahanPlus

use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.

the class CompressedMatrixBlock method aggregateUnaryOperations.

@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
    }
    // check for supported operations
    if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
        throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // prepare output dimensions
    CellIndex tempCellIndex = new CellIndex(-1, -1);
    op.indexFn.computeDimension(rlen, clen, tempCellIndex);
    if (op.aggOp.correctionExists) {
        switch(op.aggOp.correctionLocation) {
            case LASTROW:
                tempCellIndex.row++;
                break;
            case LASTCOLUMN:
                tempCellIndex.column++;
                break;
            case LASTTWOROWS:
                tempCellIndex.row += 2;
                break;
            case LASTTWOCOLUMNS:
                tempCellIndex.column += 2;
                break;
            default:
                throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
        }
    }
    // initialize and allocate the result
    if (result == null)
        result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
    else
        result.reset(tempCellIndex.row, tempCellIndex.column, false);
    MatrixBlock ret = (MatrixBlock) result;
    ret.allocateDenseBlock();
    // special handling init value for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
        ret.getDenseBlock().set(val);
    }
    // core unary aggregate
    if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
        // multi-threaded execution of all groups
        ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
        ColGroupUncompressed uc = getUncompressedColGroup();
        try {
            // compute uncompressed column group in parallel (otherwise bottleneck)
            if (uc != null)
                uc.unaryAggregateOperations(op, ret);
            // compute all compressed column groups
            ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
            ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
            if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
                int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
                for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
            } else
                for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
            List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
            pool.shutdown();
            // aggregate partial results
            if (op.indexFn instanceof ReduceAll) {
                if (op.aggOp.increOp.fn instanceof KahanFunction) {
                    KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        ((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
                    }
                    ret.quickSetValue(0, 0, kbuff._sum);
                } else {
                    double val = ret.quickGetValue(0, 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        val = op.aggOp.increOp.fn.execute(val, tmp);
                    }
                    ret.quickSetValue(0, 0, val);
                }
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    } else {
        // process UC column group
        for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
            grp.unaryAggregateOperations(op, ret);
        // process OLE/RLE column groups
        aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
    }
    // special handling zeros for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        int[] rnnz = new int[rlen];
        for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
        Builtin builtin = (Builtin) op.aggOp.increOp.fn;
        for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
            ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
    }
    // drop correction if necessary
    if (op.aggOp.correctionExists && inCP)
        ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
    // post-processing
    ret.recomputeNonZeros();
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
    return ret;
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) Future(java.util.concurrent.Future) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 7 with KahanPlus

use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.

the class LibMatrixAgg method aggregateBinaryMatrixLastRowSparseGeneric.

private static void aggregateBinaryMatrixLastRowSparseGeneric(MatrixBlock in, MatrixBlock aggVal) {
    // sparse-safe operation
    if (in.isEmptyBlock(false))
        return;
    SparseBlock a = in.getSparseBlock();
    KahanObject buffer1 = new KahanObject(0, 0);
    KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
    final int m = in.rlen;
    final int rlen = Math.min(a.numRows(), m);
    for (int i = 0; i < rlen - 1; i++) {
        if (!a.isEmpty(i)) {
            int apos = a.pos(i);
            int alen = a.size(i);
            int[] aix = a.indexes(i);
            double[] avals = a.values(i);
            for (int j = apos; j < apos + alen; j++) {
                int jix = aix[j];
                double corr = in.quickGetValue(m - 1, jix);
                buffer1._sum = aggVal.quickGetValue(i, jix);
                buffer1._correction = aggVal.quickGetValue(m - 1, jix);
                akplus.execute(buffer1, avals[j], corr);
                aggVal.quickSetValue(i, jix, buffer1._sum);
                aggVal.quickSetValue(m - 1, jix, buffer1._correction);
            }
        }
    }
    // note: nnz of aggVal/aggCorr maintained internally
    aggVal.examSparsity();
}
Also used : KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 8 with KahanPlus

use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.

the class LibMatrixAgg method cumaggregateUnaryMatrixDense.

private static void cumaggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, double[] agg, int rl, int ru) {
    final int n = in.clen;
    DenseBlock da = in.getDenseBlock();
    DenseBlock dc = out.getDenseBlock();
    double[] a = in.getDenseBlockValues();
    double[] c = out.getDenseBlockValues();
    switch(optype) {
        case CUM_KAHAN_SUM:
            {
                // CUMSUM
                KahanObject kbuff = new KahanObject(0, 0);
                KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
                d_ucumkp(da, agg, dc, n, kbuff, kplus, rl, ru);
                break;
            }
        case CUM_PROD:
            {
                // CUMPROD
                d_ucumm(a, agg, c, n, rl, ru);
                break;
            }
        case CUM_MIN:
        case CUM_MAX:
            {
                double init = (optype == AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
                d_ucummxx(a, agg, c, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported cumulative aggregation type: " + optype);
    }
}
Also used : KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) Builtin(org.apache.sysml.runtime.functionobjects.Builtin) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 9 with KahanPlus

use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.

the class LibMatrixAgg method aggregateUnaryMatrixSparse.

private static void aggregateUnaryMatrixSparse(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) {
    final int m = in.rlen;
    final int n = in.clen;
    // note: due to corrections, even the output might be a large dense block
    SparseBlock a = in.getSparseBlock();
    DenseBlock c = out.getDenseBlock();
    switch(optype) {
        case KAHAN_SUM:
            {
                // SUM via k+
                KahanObject kbuff = new KahanObject(0, 0);
                if (// SUM
                ixFn instanceof ReduceAll)
                    s_uakp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (// ROWSUM
                ixFn instanceof ReduceCol)
                    s_uarkp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (// COLSUM
                ixFn instanceof ReduceRow)
                    s_uackp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
                else if (// TRACE
                ixFn instanceof ReduceDiag)
                    s_uakptrace(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
                break;
            }
        case KAHAN_SUM_SQ:
            {
                // SUM_SQ via k+
                KahanObject kbuff = new KahanObject(0, 0);
                if (// SUM_SQ
                ixFn instanceof ReduceAll)
                    s_uasqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                else if (// ROWSUM_SQ
                ixFn instanceof ReduceCol)
                    s_uarsqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                else if (// COLSUM_SQ
                ixFn instanceof ReduceRow)
                    s_uacsqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
                break;
            }
        case CUM_KAHAN_SUM:
            {
                // CUMSUM
                KahanObject kbuff = new KahanObject(0, 0);
                KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
                s_ucumkp(a, null, out.getDenseBlock(), m, n, kbuff, kplus, rl, ru);
                break;
            }
        case CUM_PROD:
            {
                // CUMPROD
                s_ucumm(a, null, out.getDenseBlockValues(), n, rl, ru);
                break;
            }
        case CUM_MIN:
        case CUM_MAX:
            {
                double init = (optype == AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
                s_ucummxx(a, null, out.getDenseBlockValues(), n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MIN:
        case MAX:
            {
                // MAX/MIN
                double init = (optype == AggType.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
                if (// MIN/MAX
                ixFn instanceof ReduceAll)
                    s_uamxx(a, c, n, init, (Builtin) vFn, rl, ru);
                else if (// ROWMIN/ROWMAX
                ixFn instanceof ReduceCol)
                    s_uarmxx(a, c, n, init, (Builtin) vFn, rl, ru);
                else if (// COLMIN/COLMAX
                ixFn instanceof ReduceRow)
                    s_uacmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MAX_INDEX:
            {
                double init = Double.NEGATIVE_INFINITY;
                if (// ROWINDEXMAX
                ixFn instanceof ReduceCol)
                    s_uarimxx(a, c, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MIN_INDEX:
            {
                double init = Double.POSITIVE_INFINITY;
                if (// ROWINDEXMAX
                ixFn instanceof ReduceCol)
                    s_uarimin(a, c, n, init, (Builtin) vFn, rl, ru);
                break;
            }
        case MEAN:
            {
                KahanObject kbuff = new KahanObject(0, 0);
                if (// MEAN
                ixFn instanceof ReduceAll)
                    s_uamean(a, c, n, kbuff, (Mean) vFn, rl, ru);
                else if (// ROWMEAN
                ixFn instanceof ReduceCol)
                    s_uarmean(a, c, n, kbuff, (Mean) vFn, rl, ru);
                else if (// COLMEAN
                ixFn instanceof ReduceRow)
                    s_uacmean(a, c, n, kbuff, (Mean) vFn, rl, ru);
                break;
            }
        case VAR:
            {
                // VAR
                CM_COV_Object cbuff = new CM_COV_Object();
                if (// VAR
                ixFn instanceof ReduceAll)
                    s_uavar(a, c, n, cbuff, (CM) vFn, rl, ru);
                else if (// ROWVAR
                ixFn instanceof ReduceCol)
                    s_uarvar(a, c, n, cbuff, (CM) vFn, rl, ru);
                else if (// COLVAR
                ixFn instanceof ReduceRow)
                    s_uacvar(a, c, n, cbuff, (CM) vFn, rl, ru);
                break;
            }
        case PROD:
            {
                // PROD
                if (// PROD
                ixFn instanceof ReduceAll)
                    s_uam(a, c, n, rl, ru);
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) Mean(org.apache.sysml.runtime.functionobjects.Mean) ReduceDiag(org.apache.sysml.runtime.functionobjects.ReduceDiag) CM(org.apache.sysml.runtime.functionobjects.CM) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 10 with KahanPlus

use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.

the class LibMatrixAgg method aggregateTernaryDense.

private static void aggregateTernaryDense(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, MatrixBlock ret, IndexFunction ixFn, int rl, int ru) {
    // compute block operations
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
    double[] a = in1.getDenseBlockValues();
    double[] b1 = in2.getDenseBlockValues();
    // if null, literal 1
    double[] b2 = (in3 != null) ? in3.getDenseBlockValues() : null;
    final int n = in1.clen;
    if (// tak+*
    ixFn instanceof ReduceAll) {
        for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
            double b2val = (b2 != null) ? b2[ix] : 1;
            double val = a[ix] * b1[ix] * b2val;
            kplus.execute2(kbuff, val);
        }
        ret.quickSetValue(0, 0, kbuff._sum);
        ret.quickSetValue(0, 1, kbuff._correction);
    } else // tack+*
    {
        double[] c = ret.getDenseBlockValues();
        for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
            double b2val = (b2 != null) ? b2[ix] : 1;
            double val = a[ix] * b1[ix] * b2val;
            kbuff._sum = c[j];
            kbuff._correction = c[j + n];
            kplus.execute2(kbuff, val);
            c[j] = kbuff._sum;
            c[j + n] = kbuff._correction;
        }
    }
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Aggregations

KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)29 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)25 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)11 Builtin (org.apache.sysml.runtime.functionobjects.Builtin)8 ReduceAll (org.apache.sysml.runtime.functionobjects.ReduceAll)7 CM (org.apache.sysml.runtime.functionobjects.CM)5 KahanFunction (org.apache.sysml.runtime.functionobjects.KahanFunction)5 KahanPlusSq (org.apache.sysml.runtime.functionobjects.KahanPlusSq)5 ReduceCol (org.apache.sysml.runtime.functionobjects.ReduceCol)5 ReduceRow (org.apache.sysml.runtime.functionobjects.ReduceRow)5 DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)5 Mean (org.apache.sysml.runtime.functionobjects.Mean)4 ReduceDiag (org.apache.sysml.runtime.functionobjects.ReduceDiag)4 ValueFunction (org.apache.sysml.runtime.functionobjects.ValueFunction)4 CM_COV_Object (org.apache.sysml.runtime.instructions.cp.CM_COV_Object)4 Multiply (org.apache.sysml.runtime.functionobjects.Multiply)3 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 ExecutorService (java.util.concurrent.ExecutorService)2