Examples with Timing - org.apache.sysml.runtime.controlprogram.parfor.stat.Timing

Example 41 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method aggregateUnaryOperations.

@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
    }
    // check for supported operations
    if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
        throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // prepare output dimensions
    CellIndex tempCellIndex = new CellIndex(-1, -1);
    op.indexFn.computeDimension(rlen, clen, tempCellIndex);
    if (op.aggOp.correctionExists) {
        switch(op.aggOp.correctionLocation) {
            case LASTROW:
                tempCellIndex.row++;
                break;
            case LASTCOLUMN:
                tempCellIndex.column++;
                break;
            case LASTTWOROWS:
                tempCellIndex.row += 2;
                break;
            case LASTTWOCOLUMNS:
                tempCellIndex.column += 2;
                break;
            default:
                throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
        }
    }
    // initialize and allocate the result
    if (result == null)
        result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
    else
        result.reset(tempCellIndex.row, tempCellIndex.column, false);
    MatrixBlock ret = (MatrixBlock) result;
    ret.allocateDenseBlock();
    // special handling init value for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
        ret.getDenseBlock().set(val);
    }
    // core unary aggregate
    if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
        // multi-threaded execution of all groups
        ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
        ColGroupUncompressed uc = getUncompressedColGroup();
        try {
            // compute uncompressed column group in parallel (otherwise bottleneck)
            if (uc != null)
                uc.unaryAggregateOperations(op, ret);
            // compute all compressed column groups
            ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
            ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
            if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
                int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
                for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
            } else
                for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
            List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
            pool.shutdown();
            // aggregate partial results
            if (op.indexFn instanceof ReduceAll) {
                if (op.aggOp.increOp.fn instanceof KahanFunction) {
                    KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        ((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
                    }
                    ret.quickSetValue(0, 0, kbuff._sum);
                } else {
                    double val = ret.quickGetValue(0, 0);
                    for (Future<MatrixBlock> rtask : rtasks) {
                        double tmp = rtask.get().quickGetValue(0, 0);
                        val = op.aggOp.increOp.fn.execute(val, tmp);
                    }
                    ret.quickSetValue(0, 0, val);
                }
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    } else {
        // process UC column group
        for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
            grp.unaryAggregateOperations(op, ret);
        // process OLE/RLE column groups
        aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
    }
    // special handling zeros for rowmins/rowmax
    if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
        int[] rnnz = new int[rlen];
        for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
        Builtin builtin = (Builtin) op.aggOp.increOp.fn;
        for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
            ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
    }
    // drop correction if necessary
    if (op.aggOp.correctionExists && inCP)
        ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
    // post-processing
    ret.recomputeNonZeros();
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
    return ret;
}

Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) KahanFunction(org.apache.sysml.runtime.functionobjects.KahanFunction) KahanPlusSq(org.apache.sysml.runtime.functionobjects.KahanPlusSq) ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus) Future(java.util.concurrent.Future) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Example 42 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method decompress.

/**
 * Decompress block.
 *
 * @param k degree of parallelism
 * @return a new uncompressed matrix block containing the contents
 * of this block
 */
public MatrixBlock decompress(int k) {
    // early abort for not yet compressed blocks
    if (!isCompressed())
        return new MatrixBlock(this);
    if (k <= 1)
        return decompress();
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    MatrixBlock ret = new MatrixBlock(rlen, clen, sparse, nonZeros).allocateBlock();
    // multi-threaded decompression
    try {
        ExecutorService pool = CommonThreadPool.get(k);
        int rlen = getNumRows();
        int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / k)));
        ArrayList<DecompressTask> tasks = new ArrayList<>();
        for (int i = 0; i < k & i * blklen < getNumRows(); i++) tasks.add(new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen)));
        List<Future<Object>> rtasks = pool.invokeAll(tasks);
        pool.shutdown();
        for (Future<Object> rt : rtasks) // error handling
        rt.get();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    // post-processing
    ret.setNonZeros(nonZeros);
    if (LOG.isDebugEnabled())
        LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms.");
    return ret;
}

Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 43 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method transposeSelfMatrixMultOperations.

@Override
public MatrixBlock transposeSelfMatrixMultOperations(MatrixBlock out, MMTSJType tstype) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.transposeSelfMatrixMultOperations(out, tstype);
    }
    // single-threaded tsmm of single uncompressed colgroup
    if (isSingleUncompressedGroup()) {
        return ((ColGroupUncompressed) _colGroups.get(0)).getData().transposeSelfMatrixMultOperations(out, tstype);
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // check for transpose type
    if (// right not supported yet
    tstype != MMTSJType.LEFT)
        throw new DMLRuntimeException("Invalid MMTSJ type '" + tstype.toString() + "'.");
    // create output matrix block
    if (out == null)
        out = new MatrixBlock(clen, clen, false);
    else
        out.reset(clen, clen, false);
    out.allocateDenseBlock();
    if (!isEmptyBlock(false)) {
        // compute matrix mult
        leftMultByTransposeSelf(_colGroups, out, 0, _colGroups.size());
        // post-processing
        out.setNonZeros(LinearAlgebraUtils.copyUpperToLowerTriangle(out));
    }
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed TSMM in " + time.stop());
    return out;
}

Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 44 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class CompressedMatrixBlock method decompress.

/**
 * Decompress block.
 *
 * @return a new uncompressed matrix block containing the contents of this
 *         block
 */
public MatrixBlock decompress() {
    // early abort for not yet compressed blocks
    if (!isCompressed())
        return new MatrixBlock(this);
    Timing time = new Timing(true);
    // preallocation sparse rows to avoid repeated reallocations
    MatrixBlock ret = new MatrixBlock(getNumRows(), getNumColumns(), isInSparseFormat(), getNonZeros());
    if (ret.isInSparseFormat()) {
        int[] rnnz = new int[rlen];
        for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
        ret.allocateSparseRowsBlock();
        SparseBlock rows = ret.getSparseBlock();
        for (int i = 0; i < rlen; i++) rows.allocate(i, rnnz[i]);
    }
    // core decompression (append if sparse)
    for (ColGroup grp : _colGroups) grp.decompressToBlock(ret, 0, rlen);
    // post-processing (for append in decompress)
    ret.setNonZeros(nonZeros);
    if (ret.isInSparseFormat())
        ret.sortSparseRows();
    if (LOG.isDebugEnabled())
        LOG.debug("decompressed block in " + time.stop() + "ms.");
    return ret;
}

Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) SparseBlock(org.apache.sysml.runtime.matrix.data.SparseBlock)

Example 45 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class SortUtils method main.

public static void main(String[] args) {
    int n = 10000000;
    int[] indexes = new int[n];
    double[] values = new double[n];
    Random rand = new Random();
    for (int i = 0; i < n; i++) {
        indexes[i] = rand.nextInt();
        values[i] = rand.nextDouble();
    }
    System.out.println("Running quicksort test ...");
    Timing time = new Timing();
    time.start();
    SortUtils.sortByIndex(0, indexes.length, indexes, values);
    System.out.println("quicksort n=" + n + " in " + time.stop() + "ms.");
    time.start();
    boolean flag = SortUtils.isSorted(0, indexes.length, indexes);
    System.out.println("check sorted n=" + n + " in " + time.stop() + "ms, " + flag + ".");
}

Also used : Random(java.util.Random) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Aggregations

Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)78 IOException (java.io.IOException)31 ArrayList (java.util.ArrayList)29 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)25 HashMap (java.util.HashMap)24 Connection (org.apache.sysml.api.jmlc.Connection)17 PreparedScript (org.apache.sysml.api.jmlc.PreparedScript)17 ResultVariables (org.apache.sysml.api.jmlc.ResultVariables)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)17 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 TaskPartitioner (org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)10 ParForBody (org.apache.sysml.runtime.controlprogram.parfor.ParForBody)8 RemoteParForJobReturn (org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn)8 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)7 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)7 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)6 Task (org.apache.sysml.runtime.controlprogram.parfor.Task)6