use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project incubator-systemml by apache.
the class DataPartitionMR method processPartitionInstructions.
private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
int i = 0;
for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
// long begin = System.currentTimeMillis();
String[] parts = InstructionUtils.getInstructionParts(inst);
int input_index = Integer.parseInt(parts[1]);
int output_index = Integer.parseInt(parts[2]);
MatrixObject in = inputMatrices[input_index];
MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
long rlen = in.getNumRows();
long clen = in.getNumColumns();
long brlen = in.getNumRowsPerBlock();
long bclen = in.getNumColumnsPerBlock();
long N = -1;
switch(pformat) {
case ROW_BLOCK_WISE_N:
{
long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
N = numRowBlocks * brlen;
break;
}
case COLUMN_BLOCK_WISE_N:
{
long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
N = numColBlocks * bclen;
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
}
PartitionFormat pf = new PartitionFormat(pformat, (int) N);
DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
out = dpart.createPartitionedMatrixObject(in, out, true);
sts[i] = out.getMatrixCharacteristics();
i++;
}
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project systemml by apache.
the class ParForProgramBlock method handleDataPartitioning.
private void handleDataPartitioning(ExecutionContext ec) {
PDataPartitioner dataPartitioner = _dataPartitioner;
if (dataPartitioner != PDataPartitioner.NONE) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
if (sb == null)
throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
for (String var : sb.getReadOnlyParentVars()) {
Data dat = ec.getVariable(var);
// partitioning but typically related branches are never executed)
if (dat != null && dat instanceof MatrixObject) {
// unpartitioned input
MatrixObject moVar = (MatrixObject) dat;
PartitionFormat dpf = sb.determineDataPartitionFormat(var);
LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
if (dpf != PartitionFormat.NONE) {
if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
dataPartitioner = PDataPartitioner.REMOTE_SPARK;
}
Timing ltime = new Timing(true);
// input data partitioning (reuse if possible)
Data dpdatNew = _variablesDPReuse.get(var);
if (// no reuse opportunity
dpdatNew == null) {
DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
// disable binary cell for sparse if consumed by MR jobs
if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
OptimizerUtils.isSparkExecutionMode()) {
dp.disableBinaryCell();
}
MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
dpdatNew = moVarNew;
// skip remaining partitioning logic if not partitioned (e.g., too small)
if (moVar == moVarNew)
// skip to next
continue;
}
ec.setVariable(var, dpdatNew);
// recompile parfor body program
ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
// store original and partitioned matrix (for reuse if applicable)
_variablesDPOriginal.put(var, moVar);
if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
_variablesDPReuse.put(var, dpdatNew);
}
LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
}
}
}
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project incubator-systemml by apache.
the class ParForProgramBlock method createDataPartitioner.
/**
* Creates a new data partitioner according to the specified runtime parameter.
*
* @param dpf data partition format
* @param dataPartitioner data partitioner
* @param ec execution context
* @return data partitioner
*/
private DataPartitioner createDataPartitioner(PartitionFormat dpf, PDataPartitioner dataPartitioner, ExecutionContext ec) {
DataPartitioner dp = null;
// determine max degree of parallelism
int numReducers = ConfigurationManager.getNumReducers();
int maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
// correction max number of reducers on yarn clusters
if (InfrastructureAnalyzer.isYarnEnabled())
maxNumRed = (int) Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2);
int numRed = Math.min(numReducers, maxNumRed);
// create data partitioner
switch(dataPartitioner) {
case LOCAL:
dp = new DataPartitionerLocal(dpf, _numThreads);
break;
case REMOTE_MR:
dp = new DataPartitionerRemoteMR(dpf, _ID, numRed, _replicationDP, ALLOW_REUSE_MR_JVMS, false);
break;
case REMOTE_SPARK:
dp = new DataPartitionerRemoteSpark(dpf, ec, numRed, _replicationDP, false);
break;
default:
throw new DMLRuntimeException("Unknown data partitioner: '" + dataPartitioner.name() + "'.");
}
return dp;
}
use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project incubator-systemml by apache.
the class ParForProgramBlock method handleDataPartitioning.
private void handleDataPartitioning(ExecutionContext ec) {
PDataPartitioner dataPartitioner = _dataPartitioner;
if (dataPartitioner != PDataPartitioner.NONE) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
if (sb == null)
throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
for (String var : sb.getReadOnlyParentVars()) {
Data dat = ec.getVariable(var);
// partitioning but typically related branches are never executed)
if (dat != null && dat instanceof MatrixObject) {
// unpartitioned input
MatrixObject moVar = (MatrixObject) dat;
PartitionFormat dpf = sb.determineDataPartitionFormat(var);
LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
if (dpf != PartitionFormat.NONE) {
if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
dataPartitioner = PDataPartitioner.REMOTE_SPARK;
}
Timing ltime = new Timing(true);
// input data partitioning (reuse if possible)
Data dpdatNew = _variablesDPReuse.get(var);
if (// no reuse opportunity
dpdatNew == null) {
DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
// disable binary cell for sparse if consumed by MR jobs
if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
OptimizerUtils.isSparkExecutionMode()) {
dp.disableBinaryCell();
}
MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
dpdatNew = moVarNew;
// skip remaining partitioning logic if not partitioned (e.g., too small)
if (moVar == moVarNew)
// skip to next
continue;
}
ec.setVariable(var, dpdatNew);
// recompile parfor body program
ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
// store original and partitioned matrix (for reuse if applicable)
_variablesDPOriginal.put(var, moVar);
if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
_variablesDPReuse.put(var, dpdatNew);
}
LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
}
}
}
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project systemml by apache.
the class ParForProgramBlock method createDataPartitioner.
/**
* Creates a new data partitioner according to the specified runtime parameter.
*
* @param dpf data partition format
* @param dataPartitioner data partitioner
* @param ec execution context
* @return data partitioner
*/
private DataPartitioner createDataPartitioner(PartitionFormat dpf, PDataPartitioner dataPartitioner, ExecutionContext ec) {
DataPartitioner dp = null;
// determine max degree of parallelism
int numReducers = ConfigurationManager.getNumReducers();
int maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
// correction max number of reducers on yarn clusters
if (InfrastructureAnalyzer.isYarnEnabled())
maxNumRed = (int) Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2);
int numRed = Math.min(numReducers, maxNumRed);
// create data partitioner
switch(dataPartitioner) {
case LOCAL:
dp = new DataPartitionerLocal(dpf, _numThreads);
break;
case REMOTE_MR:
dp = new DataPartitionerRemoteMR(dpf, _ID, numRed, _replicationDP, ALLOW_REUSE_MR_JVMS, false);
break;
case REMOTE_SPARK:
dp = new DataPartitionerRemoteSpark(dpf, ec, numRed, _replicationDP, false);
break;
default:
throw new DMLRuntimeException("Unknown data partitioner: '" + dataPartitioner.name() + "'.");
}
return dp;
}
Aggregations