Search in sources :

Example 61 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParForDP.

private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    // mark matrix var as partitioned
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    // partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
    exportMatricesToHDFS(ec, _colocatedDPMatrix);
    // Step 4) submit MR job (wait for finished work)
    // TODO runtime support for binary cell partitioning
    OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) HashMap(java.util.HashMap) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 62 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class ParForProgramBlock method handleDataPartitioning.

private void handleDataPartitioning(ExecutionContext ec) {
    PDataPartitioner dataPartitioner = _dataPartitioner;
    if (dataPartitioner != PDataPartitioner.NONE) {
        ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
        if (sb == null)
            throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
        for (String var : sb.getReadOnlyParentVars()) {
            Data dat = ec.getVariable(var);
            // partitioning but typically related branches are never executed)
            if (dat != null && dat instanceof MatrixObject) {
                // unpartitioned input
                MatrixObject moVar = (MatrixObject) dat;
                PartitionFormat dpf = sb.determineDataPartitionFormat(var);
                LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
                if (dpf != PartitionFormat.NONE) {
                    if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
                        LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
                        dataPartitioner = PDataPartitioner.REMOTE_SPARK;
                    }
                    Timing ltime = new Timing(true);
                    // input data partitioning (reuse if possible)
                    Data dpdatNew = _variablesDPReuse.get(var);
                    if (// no reuse opportunity
                    dpdatNew == null) {
                        DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
                        // disable binary cell for sparse if consumed by MR jobs
                        if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
                        OptimizerUtils.isSparkExecutionMode()) {
                            dp.disableBinaryCell();
                        }
                        MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
                        dpdatNew = moVarNew;
                        // skip remaining partitioning logic if not partitioned (e.g., too small)
                        if (moVar == moVarNew)
                            // skip to next
                            continue;
                    }
                    ec.setVariable(var, dpdatNew);
                    // recompile parfor body program
                    ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
                    // store original and partitioned matrix (for reuse if applicable)
                    _variablesDPOriginal.put(var, moVar);
                    if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
                        _variablesDPReuse.put(var, dpdatNew);
                    }
                    LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
                }
            }
        }
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Data(org.apache.sysml.runtime.instructions.cp.Data) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 63 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class CompressedMatrixBlock method decompress.

/**
 * Decompress block.
 *
 * @param k degree of parallelism
 * @return a new uncompressed matrix block containing the contents
 * of this block
 */
public MatrixBlock decompress(int k) {
    // early abort for not yet compressed blocks
    if (!isCompressed())
        return new MatrixBlock(this);
    if (k <= 1)
        return decompress();
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    MatrixBlock ret = new MatrixBlock(rlen, clen, sparse, nonZeros).allocateBlock();
    // multi-threaded decompression
    try {
        ExecutorService pool = CommonThreadPool.get(k);
        int rlen = getNumRows();
        int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / k)));
        ArrayList<DecompressTask> tasks = new ArrayList<>();
        for (int i = 0; i < k & i * blklen < getNumRows(); i++) tasks.add(new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen)));
        List<Future<Object>> rtasks = pool.invokeAll(tasks);
        pool.shutdown();
        for (Future<Object> rt : rtasks) // error handling
        rt.get();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    // post-processing
    ret.setNonZeros(nonZeros);
    if (LOG.isDebugEnabled())
        LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms.");
    return ret;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 64 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class CompressedMatrixBlock method aggregateBinaryOperations.

@Override
public MatrixBlock aggregateBinaryOperations(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, AggregateBinaryOperator op) {
    // call uncompressed matrix mult if necessary
    if (!isCompressed()) {
        return super.aggregateBinaryOperations(m1, m2, ret, op);
    }
    // multi-threaded mm of single uncompressed colgroup
    if (isSingleUncompressedGroup()) {
        MatrixBlock tmp = ((ColGroupUncompressed) _colGroups.get(0)).getData();
        return tmp.aggregateBinaryOperations(this == m1 ? tmp : m1, this == m2 ? tmp : m2, ret, op);
    }
    Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
    // setup meta data (dimensions, sparsity)
    int rl = m1.getNumRows();
    int cl = m2.getNumColumns();
    // create output matrix block
    if (ret == null)
        ret = new MatrixBlock(rl, cl, false, rl * cl);
    else
        ret.reset(rl, cl, false, rl * cl);
    // compute matrix mult
    if (m1.getNumRows() > 1 && m2.getNumColumns() == 1) {
        // MV right
        CompressedMatrixBlock cmb = (CompressedMatrixBlock) m1;
        if (op.getNumThreads() > 1)
            cmb.rightMultByVector(m2, ret, op.getNumThreads());
        else
            cmb.rightMultByVector(m2, ret);
    } else if (m1.getNumRows() == 1 && m2.getNumColumns() > 1) {
        // MV left
        if (op.getNumThreads() > 1)
            leftMultByVectorTranspose(_colGroups, m1, ret, false, op.getNumThreads());
        else
            leftMultByVectorTranspose(_colGroups, m1, ret, false, true);
    } else {
        // MM
        // prepare the other input (including decompression if necessary)
        boolean right = (m1 == this);
        MatrixBlock that = right ? m2 : m1;
        if (that instanceof CompressedMatrixBlock) {
            that = ((CompressedMatrixBlock) that).isCompressed() ? ((CompressedMatrixBlock) that).decompress() : that;
        }
        // transpose for sequential repeated column access
        if (right) {
            that = LibMatrixReorg.transpose(that, new MatrixBlock(that.getNumColumns(), that.getNumRows(), that.isInSparseFormat()), op.getNumThreads());
        }
        MatrixBlock tmpIn = new MatrixBlock(1, that.getNumColumns(), false).allocateBlock();
        MatrixBlock tmpOut = new MatrixBlock(right ? rl : 1, right ? 1 : cl, false).allocateBlock();
        if (right) {
            // MM right
            for (int i = 0; i < that.getNumRows(); i++) {
                // on transpose
                tmpIn = that.slice(i, i, 0, that.getNumColumns() - 1, tmpIn);
                MatrixBlock tmpIn2 = // meta data op
                LibMatrixReorg.transpose(// meta data op
                tmpIn, new MatrixBlock(tmpIn.getNumColumns(), tmpIn.getNumRows(), false));
                tmpOut.reset(tmpOut.getNumRows(), tmpOut.getNumColumns());
                if (op.getNumThreads() > 1)
                    rightMultByVector(tmpIn2, tmpOut, op.getNumThreads());
                else
                    rightMultByVector(tmpIn2, tmpOut);
                ret.leftIndexingOperations(tmpOut, 0, ret.getNumRows() - 1, i, i, ret, UpdateType.INPLACE);
            }
        } else {
            // MM left
            for (int i = 0; i < that.getNumRows(); i++) {
                tmpIn = that.slice(i, i, 0, that.getNumColumns() - 1, tmpIn);
                if (op.getNumThreads() > 1)
                    leftMultByVectorTranspose(_colGroups, tmpIn, tmpOut, false, op.getNumThreads());
                else
                    leftMultByVectorTranspose(_colGroups, tmpIn, tmpOut, false, true);
                ret.leftIndexingOperations(tmpOut, i, i, 0, ret.getNumColumns() - 1, ret, UpdateType.INPLACE);
            }
        }
    }
    if (LOG.isDebugEnabled())
        LOG.debug("Compressed MM in " + time.stop());
    return ret;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 65 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.

the class PiggybackingWorker method mergeMRJobInstructions.

protected LinkedList<MergedMRJobInstruction> mergeMRJobInstructions(LinkedList<Pair<Long, MRJobInstruction>> workingSet) throws IllegalAccessException {
    LinkedList<MergedMRJobInstruction> ret = new LinkedList<>();
    Timing time = new Timing(true);
    // NOTE currently all merged into one (might be invalid due to memory constraints)
    MergedMRJobInstruction minst = new MergedMRJobInstruction();
    for (Pair<Long, MRJobInstruction> inst : workingSet) {
        long instID = inst.getKey();
        MRJobInstruction instVal = inst.getValue();
        int numOutputs = instVal.getOutputs().length;
        // append to current merged instruction
        if (minst.inst == null) {
            // deep copy first instruction
            minst.inst = new MRJobInstruction(instVal);
            minst.addInstructionMetaData(instID, 0, numOutputs);
        } else {
            // merge other instructions
            if (minst.inst.isMergableMRJobInstruction(instVal)) {
                // add instruction to open merged instruction
                // before merge
                int offOutputs = minst.inst.getOutputs().length;
                minst.inst.mergeMRJobInstruction(instVal);
                minst.addInstructionMetaData(instID, offOutputs, numOutputs);
            } else {
                // close current merged instruction
                ret.add(minst);
                // open new merged instruction
                minst = new MergedMRJobInstruction();
                minst.inst = new MRJobInstruction(instVal);
                minst.addInstructionMetaData(instID, 0, numOutputs);
            }
        }
    }
    // close last open merged instruction
    ret.add(minst);
    // output log info for better understandability for users
    LOG.info("Merged MR-Job instructions: " + workingSet.size() + " --> " + ret.size() + " in " + time.stop() + "ms.");
    return ret;
}
Also used : MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) LinkedList(java.util.LinkedList)

Aggregations

Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)78 IOException (java.io.IOException)31 ArrayList (java.util.ArrayList)29 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)25 HashMap (java.util.HashMap)24 Connection (org.apache.sysml.api.jmlc.Connection)17 PreparedScript (org.apache.sysml.api.jmlc.PreparedScript)17 ResultVariables (org.apache.sysml.api.jmlc.ResultVariables)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)17 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 TaskPartitioner (org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)10 ParForBody (org.apache.sysml.runtime.controlprogram.parfor.ParForBody)8 RemoteParForJobReturn (org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn)8 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)7 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)7 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)6 Task (org.apache.sysml.runtime.controlprogram.parfor.Task)6