Search in sources :

Example 16 with KahanFunction

use of org.apache.sysml.runtime.functionobjects.KahanFunction in project systemml by apache.

the class LibMatrixAgg method getAggType.

private static AggType getAggType(AggregateUnaryOperator op) {
    ValueFunction vfn = op.aggOp.increOp.fn;
    IndexFunction ifn = op.indexFn;
    // (kahan) sum / sum squared / trace (for ReduceDiag)
    if (vfn instanceof KahanFunction && (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag)) {
        if (vfn instanceof KahanPlus)
            return AggType.KAHAN_SUM;
        else if (vfn instanceof KahanPlusSq)
            return AggType.KAHAN_SUM_SQ;
    }
    // mean
    if (vfn instanceof Mean && (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
        return AggType.MEAN;
    }
    // variance
    if (vfn instanceof CM && ((CM) vfn).getAggOpType() == AggregateOperationTypes.VARIANCE && (op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
        return AggType.VAR;
    }
    // prod
    if (vfn instanceof Multiply && ifn instanceof ReduceAll) {
        return AggType.PROD;
    }
    // min / max
    if (vfn instanceof Builtin && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
        BuiltinCode bfcode = ((Builtin) vfn).bFunc;
        switch(bfcode) {
            case MAX:
                return AggType.MAX;
            case MIN:
                return AggType.MIN;
            case MAXINDEX:
                return AggType.MAX_INDEX;
            case MININDEX:
                return AggType.MIN_INDEX;
            // do nothing
            default:
        }
    }
    return AggType.INVALID;
}
Also used : ValueFunction(org.apache.sysml.runtime.functionobjects.ValueFunction) ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) Mean(org.apache.sysml.runtime.functionobjects.Mean) ReduceDiag(org.apache.sysml.runtime.functionobjects.ReduceDiag) CM(org.apache.sysml.runtime.functionobjects.CM) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) IndexFunction(org.apache.sysml.runtime.functionobjects.IndexFunction) BuiltinCode(org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) Multiply(org.apache.sysml.runtime.functionobjects.Multiply) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 17 with KahanFunction

use of org.apache.sysml.runtime.functionobjects.KahanFunction in project systemml by apache.

the class CompressedMatrixBlock method aggregateUnaryOperations.

@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
    }
    // check for supported operations
    if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
        throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // prepare output dimensions
    CellIndex tempCellIndex = new CellIndex(-1, -1);
    op.indexFn.computeDimension(rlen, clen, tempCellIndex);
    if (op.aggOp.correctionExists) {
        switch(op.aggOp.correctionLocation) {
            case LASTROW:
                tempCellIndex.row++;
                break;
            case LASTCOLUMN:
                tempCellIndex.column++;
                break;
            case LASTTWOROWS:
                tempCellIndex.row += 2;
                break;
            case LASTTWOCOLUMNS:
                tempCellIndex.column += 2;
                break;
            default:
                throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
        }
    }
    // initialize and allocate the result
    if (result == null)
        result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
    else
        result.reset(tempCellIndex.row, tempCellIndex.column, false);
    MatrixBlock ret = (MatrixBlock) result;
    ret.allocateDenseBlock();
    // special handling init value for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
        ret.getDenseBlock().set(val);
    }
    // core unary aggregate
    if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
        // multi-threaded execution of all groups
        ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
        ColGroupUncompressed uc = getUncompressedColGroup();
        try {
            // compute uncompressed column group in parallel (otherwise bottleneck)
            if (uc != null)
                uc.unaryAggregateOperations(op, ret);
            // compute all compressed column groups
            ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
            ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
            if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
                int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
                for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
            } else
                for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
            List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
            pool.shutdown();
            // aggregate partial results
            if (op.indexFn instanceof ReduceAll) {
                if (op.aggOp.increOp.fn instanceof KahanFunction) {
                    KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        ((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
                    }
                    ret.quickSetValue(0, 0, kbuff._sum);
                } else {
                    double val = ret.quickGetValue(0, 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        val = op.aggOp.increOp.fn.execute(val, tmp);
                    }
                    ret.quickSetValue(0, 0, val);
                }
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    } else {
        // process UC column group
        for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
            grp.unaryAggregateOperations(op, ret);
        // process OLE/RLE column groups
        aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
    }
    // special handling zeros for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        int[] rnnz = new int[rlen];
        for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
        Builtin builtin = (Builtin) op.aggOp.increOp.fn;
        for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
            ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
    }
    // drop correction if necessary
    if (op.aggOp.correctionExists && inCP)
        ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
    // post-processing
    ret.recomputeNonZeros();
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
    return ret;
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) Future(java.util.concurrent.Future) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 18 with KahanFunction

use of org.apache.sysml.runtime.functionobjects.KahanFunction in project incubator-systemml by apache.

the class SpoofCellwise method execute.

@Override
public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k) {
    // sanity check
    if (inputs == null || inputs.size() < 1)
        throw new RuntimeException("Invalid input arguments.");
    // input preparation
    MatrixBlock a = inputs.get(0);
    SideInput[] b = prepInputMatrices(inputs);
    double[] scalars = prepInputScalars(scalarObjects);
    final int m = a.getNumRows();
    final int n = a.getNumColumns();
    // sparse safe check
    boolean sparseSafe = isSparseSafe() || (b.length == 0 && genexec(0, b, scalars, m, n, 0, 0) == 0);
    long inputSize = sparseSafe ? getTotalInputNnz(inputs) : getTotalInputSize(inputs);
    if (inputSize < PAR_NUMCELL_THRESHOLD) {
        // serial execution
        k = 1;
    }
    double ret = 0;
    if (// SINGLE-THREADED
    k <= 1) {
        if (inputs.get(0) instanceof CompressedMatrixBlock)
            ret = executeCompressedAndAgg((CompressedMatrixBlock) a, b, scalars, m, n, sparseSafe, 0, m);
        else if (!inputs.get(0).isInSparseFormat())
            ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m);
        else
            ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m);
    } else // MULTI-THREADED
    {
        try {
            ExecutorService pool = CommonThreadPool.get(k);
            ArrayList<ParAggTask> tasks = new ArrayList<>();
            int nk = (a instanceof CompressedMatrixBlock) ? k : UtilFunctions.roundToNext(Math.min(8 * k, m / 32), k);
            int blklen = (int) (Math.ceil((double) m / nk));
            if (a instanceof CompressedMatrixBlock)
                blklen = BitmapEncoder.getAlignedBlocksize(blklen);
            for (int i = 0; i < nk & i * blklen < m; i++) tasks.add(new ParAggTask(a, b, scalars, m, n, sparseSafe, i * blklen, Math.min((i + 1) * blklen, m)));
            // execute tasks
            List<Future<Double>> taskret = pool.invokeAll(tasks);
            pool.shutdown();
            // aggregate partial results
            ValueFunction vfun = getAggFunction();
            if (vfun instanceof KahanFunction) {
                KahanObject kbuff = new KahanObject(0, 0);
                KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
                for (Future<Double> task : taskret) kplus.execute2(kbuff, task.get());
                ret = kbuff._sum;
            } else {
                for (Future<Double> task : taskret) ret = vfun.execute(ret, task.get());
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    }
    // correction for min/max
    if ((_aggOp == AggOp.MIN || _aggOp == AggOp.MAX) && sparseSafe && a.getNonZeros() < a.getNumRows() * a.getNumColumns())
        // unseen 0 might be max or min value
        ret = getAggFunction().execute(ret, 0);
    return new DoubleObject(ret);
}
Also used : ValueFunction(org.apache.sysml.runtime.functionobjects.ValueFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) ArrayList(java.util.ArrayList) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) Future(java.util.concurrent.Future) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 19 with KahanFunction

use of org.apache.sysml.runtime.functionobjects.KahanFunction in project incubator-systemml by apache.

the class SpoofCellwise method execute.

@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
    // sanity check
    if (inputs == null || inputs.size() < 1 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // input preparation
    MatrixBlock a = inputs.get(0);
    SideInput[] b = prepInputMatrices(inputs);
    double[] scalars = prepInputScalars(scalarObjects);
    final int m = a.getNumRows();
    final int n = a.getNumColumns();
    // sparse safe check
    boolean sparseSafe = isSparseSafe() || (b.length == 0 && genexec(0, b, scalars, m, n, 0, 0) == 0);
    long inputSize = sparseSafe ? getTotalInputNnz(inputs) : getTotalInputSize(inputs);
    if (inputSize < PAR_NUMCELL_THRESHOLD) {
        // serial execution
        k = 1;
    }
    // result allocation and preparations
    boolean sparseOut = _type == CellType.NO_AGG && sparseSafe && a.isInSparseFormat();
    switch(_type) {
        case NO_AGG:
            out.reset(m, n, sparseOut);
            break;
        case ROW_AGG:
            out.reset(m, 1, false);
            break;
        case COL_AGG:
            out.reset(1, n, false);
            break;
        default:
            throw new DMLRuntimeException("Invalid cell type: " + _type);
    }
    out.allocateBlock();
    long lnnz = 0;
    if (// SINGLE-THREADED
    k <= 1) {
        if (inputs.get(0) instanceof CompressedMatrixBlock)
            lnnz = executeCompressed((CompressedMatrixBlock) a, b, scalars, out, m, n, sparseSafe, 0, m);
        else if (!inputs.get(0).isInSparseFormat())
            lnnz = executeDense(a.getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
        else
            lnnz = executeSparse(a.getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
    } else // MULTI-THREADED
    {
        try {
            ExecutorService pool = CommonThreadPool.get(k);
            ArrayList<ParExecTask> tasks = new ArrayList<>();
            int nk = UtilFunctions.roundToNext(Math.min(8 * k, m / 32), k);
            int blklen = (int) (Math.ceil((double) m / nk));
            if (a instanceof CompressedMatrixBlock)
                blklen = BitmapEncoder.getAlignedBlocksize(blklen);
            for (int i = 0; i < nk & i * blklen < m; i++) tasks.add(new ParExecTask(a, b, scalars, out, m, n, sparseSafe, i * blklen, Math.min((i + 1) * blklen, m)));
            // execute tasks
            List<Future<Long>> taskret = pool.invokeAll(tasks);
            pool.shutdown();
            // aggregate nnz and error handling
            for (Future<Long> task : taskret) lnnz += task.get();
            if (_type == CellType.COL_AGG) {
                // aggregate partial results
                double[] c = out.getDenseBlockValues();
                ValueFunction vfun = getAggFunction();
                if (vfun instanceof KahanFunction) {
                    for (ParExecTask task : tasks) LibMatrixMult.vectAdd(task.getResult().getDenseBlockValues(), c, 0, 0, n);
                } else {
                    for (ParExecTask task : tasks) {
                        double[] tmp = task.getResult().getDenseBlockValues();
                        for (int j = 0; j < n; j++) c[j] = vfun.execute(c[j], tmp[j]);
                    }
                }
                lnnz = out.recomputeNonZeros();
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    }
    // post-processing
    out.setNonZeros(lnnz);
    out.examSparsity();
    return out;
}
Also used : ValueFunction(org.apache.sysml.runtime.functionobjects.ValueFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Example 20 with KahanFunction

use of org.apache.sysml.runtime.functionobjects.KahanFunction in project incubator-systemml by apache.

the class SpoofCellwise method executeSparseColAggSum.

private long executeSparseColAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
    KahanFunction kplus = (KahanFunction) getAggFunction();
    KahanObject kbuff = new KahanObject(0, 0);
    double[] corr = new double[n];
    // note: sequential scan algorithm for both sparse-safe and -unsafe
    // in order to avoid binary search for sparse-unsafe
    double[] c = out.getDenseBlockValues();
    for (int i = rl; i < ru; i++) {
        kbuff.set(0, 0);
        int lastj = -1;
        // handle non-empty rows
        if (sblock != null && !sblock.isEmpty(i)) {
            int apos = sblock.pos(i);
            int alen = sblock.size(i);
            int[] aix = sblock.indexes(i);
            double[] avals = sblock.values(i);
            for (int k = apos; k < apos + alen; k++) {
                // process zeros before current non-zero
                if (!sparseSafe)
                    for (int j = lastj + 1; j < aix[k]; j++) {
                        kbuff.set(c[j], corr[j]);
                        kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
                        c[j] = kbuff._sum;
                        corr[j] = kbuff._correction;
                    }
                // process current non-zero
                lastj = aix[k];
                kbuff.set(c[aix[k]], corr[aix[k]]);
                kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
                c[aix[k]] = kbuff._sum;
                corr[aix[k]] = kbuff._correction;
            }
        }
        // process empty rows or remaining zeros
        if (!sparseSafe)
            for (int j = lastj + 1; j < n; j++) {
                kbuff.set(c[j], corr[j]);
                kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
                c[j] = kbuff._sum;
                corr[j] = kbuff._correction;
            }
    }
    return -1;
}
Also used : KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject)

Aggregations

KahanFunction (org.apache.sysml.runtime.functionobjects.KahanFunction)32 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)28 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)10 ValueFunction (org.apache.sysml.runtime.functionobjects.ValueFunction)10 ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)4 Builtin (org.apache.sysml.runtime.functionobjects.Builtin)4 KahanPlusSq (org.apache.sysml.runtime.functionobjects.KahanPlusSq)4 ReduceAll (org.apache.sysml.runtime.functionobjects.ReduceAll)4 ReduceCol (org.apache.sysml.runtime.functionobjects.ReduceCol)4 IOException (java.io.IOException)2 ColGroup (org.apache.sysml.runtime.compress.ColGroup)2 ColGroupValue (org.apache.sysml.runtime.compress.ColGroupValue)2 Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)2 BuiltinCode (org.apache.sysml.runtime.functionobjects.Builtin.BuiltinCode)2