use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class ParForProgramBlock method executeRemoteSparkParForDP.
private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
Timing time = (_monitor ? new Timing(true) : null);
// Step 0) check and compile to CP (if forced remote parfor)
boolean flagForced = checkMRAndRecompileToCP(0);
// Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
// mark matrix var as partitioned
inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
// Step 2) init parallel workers (serialize PBs)
// NOTES: each mapper changes filenames with regard to his ID as we submit a single
// job, cannot reuse serialized string, since variables are serialized as well.
ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
HashMap<String, byte[]> clsMap = new HashMap<>();
String program = ProgramConverter.serializeParForBody(body, clsMap);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
// Step 3) create tasks
TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
String resultFile = constructResultFileName();
long numIterations = partitioner.getNumIterations();
// partitioner.createTasks().size();
long numCreatedTasks = numIterations;
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
// write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
exportMatricesToHDFS(ec, _colocatedDPMatrix);
// Step 4) submit MR job (wait for finished work)
// TODO runtime support for binary cell partitioning
OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
// Step 5) collecting results from each parallel worker
int numExecutedTasks = ret.getNumExecutedTasks();
int numExecutedIterations = ret.getNumExecutedIterations();
// consolidate results into global symbol table
consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
if (// see step 0
flagForced)
releaseForcedRecompile(0);
inputMatrix.unsetPartitioned();
if (_monitor) {
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class ParForProgramBlock method handleDataPartitioning.
private void handleDataPartitioning(ExecutionContext ec) {
PDataPartitioner dataPartitioner = _dataPartitioner;
if (dataPartitioner != PDataPartitioner.NONE) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
if (sb == null)
throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
for (String var : sb.getReadOnlyParentVars()) {
Data dat = ec.getVariable(var);
// partitioning but typically related branches are never executed)
if (dat != null && dat instanceof MatrixObject) {
// unpartitioned input
MatrixObject moVar = (MatrixObject) dat;
PartitionFormat dpf = sb.determineDataPartitionFormat(var);
LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
if (dpf != PartitionFormat.NONE) {
if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
dataPartitioner = PDataPartitioner.REMOTE_SPARK;
}
Timing ltime = new Timing(true);
// input data partitioning (reuse if possible)
Data dpdatNew = _variablesDPReuse.get(var);
if (// no reuse opportunity
dpdatNew == null) {
DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
// disable binary cell for sparse if consumed by MR jobs
if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
OptimizerUtils.isSparkExecutionMode()) {
dp.disableBinaryCell();
}
MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
dpdatNew = moVarNew;
// skip remaining partitioning logic if not partitioned (e.g., too small)
if (moVar == moVarNew)
// skip to next
continue;
}
ec.setVariable(var, dpdatNew);
// recompile parfor body program
ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
// store original and partitioned matrix (for reuse if applicable)
_variablesDPOriginal.put(var, moVar);
if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
_variablesDPReuse.put(var, dpdatNew);
}
LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
}
}
}
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class CompressedMatrixBlock method decompress.
/**
* Decompress block.
*
* @param k degree of parallelism
* @return a new uncompressed matrix block containing the contents
* of this block
*/
public MatrixBlock decompress(int k) {
// early abort for not yet compressed blocks
if (!isCompressed())
return new MatrixBlock(this);
if (k <= 1)
return decompress();
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
MatrixBlock ret = new MatrixBlock(rlen, clen, sparse, nonZeros).allocateBlock();
// multi-threaded decompression
try {
ExecutorService pool = CommonThreadPool.get(k);
int rlen = getNumRows();
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / k)));
ArrayList<DecompressTask> tasks = new ArrayList<>();
for (int i = 0; i < k & i * blklen < getNumRows(); i++) tasks.add(new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen)));
List<Future<Object>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
for (Future<Object> rt : rtasks) // error handling
rt.get();
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// post-processing
ret.setNonZeros(nonZeros);
if (LOG.isDebugEnabled())
LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms.");
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class CompressedMatrixBlock method aggregateBinaryOperations.
@Override
public MatrixBlock aggregateBinaryOperations(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, AggregateBinaryOperator op) {
// call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateBinaryOperations(m1, m2, ret, op);
}
// multi-threaded mm of single uncompressed colgroup
if (isSingleUncompressedGroup()) {
MatrixBlock tmp = ((ColGroupUncompressed) _colGroups.get(0)).getData();
return tmp.aggregateBinaryOperations(this == m1 ? tmp : m1, this == m2 ? tmp : m2, ret, op);
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
// setup meta data (dimensions, sparsity)
int rl = m1.getNumRows();
int cl = m2.getNumColumns();
// create output matrix block
if (ret == null)
ret = new MatrixBlock(rl, cl, false, rl * cl);
else
ret.reset(rl, cl, false, rl * cl);
// compute matrix mult
if (m1.getNumRows() > 1 && m2.getNumColumns() == 1) {
// MV right
CompressedMatrixBlock cmb = (CompressedMatrixBlock) m1;
if (op.getNumThreads() > 1)
cmb.rightMultByVector(m2, ret, op.getNumThreads());
else
cmb.rightMultByVector(m2, ret);
} else if (m1.getNumRows() == 1 && m2.getNumColumns() > 1) {
// MV left
if (op.getNumThreads() > 1)
leftMultByVectorTranspose(_colGroups, m1, ret, false, op.getNumThreads());
else
leftMultByVectorTranspose(_colGroups, m1, ret, false, true);
} else {
// MM
// prepare the other input (including decompression if necessary)
boolean right = (m1 == this);
MatrixBlock that = right ? m2 : m1;
if (that instanceof CompressedMatrixBlock) {
that = ((CompressedMatrixBlock) that).isCompressed() ? ((CompressedMatrixBlock) that).decompress() : that;
}
// transpose for sequential repeated column access
if (right) {
that = LibMatrixReorg.transpose(that, new MatrixBlock(that.getNumColumns(), that.getNumRows(), that.isInSparseFormat()), op.getNumThreads());
}
MatrixBlock tmpIn = new MatrixBlock(1, that.getNumColumns(), false).allocateBlock();
MatrixBlock tmpOut = new MatrixBlock(right ? rl : 1, right ? 1 : cl, false).allocateBlock();
if (right) {
// MM right
for (int i = 0; i < that.getNumRows(); i++) {
// on transpose
tmpIn = that.slice(i, i, 0, that.getNumColumns() - 1, tmpIn);
MatrixBlock tmpIn2 = // meta data op
LibMatrixReorg.transpose(// meta data op
tmpIn, new MatrixBlock(tmpIn.getNumColumns(), tmpIn.getNumRows(), false));
tmpOut.reset(tmpOut.getNumRows(), tmpOut.getNumColumns());
if (op.getNumThreads() > 1)
rightMultByVector(tmpIn2, tmpOut, op.getNumThreads());
else
rightMultByVector(tmpIn2, tmpOut);
ret.leftIndexingOperations(tmpOut, 0, ret.getNumRows() - 1, i, i, ret, UpdateType.INPLACE);
}
} else {
// MM left
for (int i = 0; i < that.getNumRows(); i++) {
tmpIn = that.slice(i, i, 0, that.getNumColumns() - 1, tmpIn);
if (op.getNumThreads() > 1)
leftMultByVectorTranspose(_colGroups, tmpIn, tmpOut, false, op.getNumThreads());
else
leftMultByVectorTranspose(_colGroups, tmpIn, tmpOut, false, true);
ret.leftIndexingOperations(tmpOut, i, i, 0, ret.getNumColumns() - 1, ret, UpdateType.INPLACE);
}
}
}
if (LOG.isDebugEnabled())
LOG.debug("Compressed MM in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.
the class PiggybackingWorker method mergeMRJobInstructions.
protected LinkedList<MergedMRJobInstruction> mergeMRJobInstructions(LinkedList<Pair<Long, MRJobInstruction>> workingSet) throws IllegalAccessException {
LinkedList<MergedMRJobInstruction> ret = new LinkedList<>();
Timing time = new Timing(true);
// NOTE currently all merged into one (might be invalid due to memory constraints)
MergedMRJobInstruction minst = new MergedMRJobInstruction();
for (Pair<Long, MRJobInstruction> inst : workingSet) {
long instID = inst.getKey();
MRJobInstruction instVal = inst.getValue();
int numOutputs = instVal.getOutputs().length;
// append to current merged instruction
if (minst.inst == null) {
// deep copy first instruction
minst.inst = new MRJobInstruction(instVal);
minst.addInstructionMetaData(instID, 0, numOutputs);
} else {
// merge other instructions
if (minst.inst.isMergableMRJobInstruction(instVal)) {
// add instruction to open merged instruction
// before merge
int offOutputs = minst.inst.getOutputs().length;
minst.inst.mergeMRJobInstruction(instVal);
minst.addInstructionMetaData(instID, offOutputs, numOutputs);
} else {
// close current merged instruction
ret.add(minst);
// open new merged instruction
minst = new MergedMRJobInstruction();
minst.inst = new MRJobInstruction(instVal);
minst.addInstructionMetaData(instID, 0, numOutputs);
}
}
}
// close last open merged instruction
ret.add(minst);
// output log info for better understandability for users
LOG.info("Merged MR-Job instructions: " + workingSet.size() + " --> " + ret.size() + " in " + time.stop() + "ms.");
return ret;
}
Aggregations