use of org.apache.sysml.runtime.instructions.cp.IntObject in project systemml by apache.
the class TaskPartitionerFactoring method createTasks.
@Override
public List<Task> createTasks() {
LinkedList<Task> tasks = new LinkedList<>();
long lFrom = _fromVal.getLongValue();
long lTo = _toVal.getLongValue();
long lIncr = _incrVal.getLongValue();
// number of parallel workers
int P = _numThreads;
// total number of iterations
long N = _numIter;
// remaining number of iterations
long R = N;
// next _numThreads task sizes
long K = -1;
// type of iterations: range tasks (similar to run-length encoding) make only sense if taskSize>3
TaskType type = null;
for (long i = lFrom; i <= lTo; ) {
K = determineNextBatchSize(R, P);
R -= (K * P);
type = (ParForProgramBlock.USE_RANGE_TASKS_IF_USEFUL && K > 3) ? TaskType.RANGE : TaskType.SET;
// for each logical processor
for (int j = 0; j < P; j++) {
if (// no more iterations
i > lTo)
break;
// create new task and add to list of tasks
Task lTask = new Task(_iterVarName, type);
tasks.addLast(lTask);
// add iterations to task
if (type == TaskType.SET) {
// value based tasks
for (long k = 0; k < K && i <= lTo; k++, i += lIncr) lTask.addIteration(new IntObject(i));
} else {
// determine end of task
long to = Math.min(i + (K - 1) * lIncr, lTo);
// range based tasks
// from
lTask.addIteration(new IntObject(i));
// to
lTask.addIteration(new IntObject(to));
// increment
lTask.addIteration(new IntObject(lIncr));
i = to + lIncr;
}
}
}
return tasks;
}
use of org.apache.sysml.runtime.instructions.cp.IntObject in project systemml by apache.
the class TaskPartitionerFactoring method createTasks.
@Override
public long createTasks(LocalTaskQueue<Task> queue) {
long numCreatedTasks = 0;
long lFrom = _fromVal.getLongValue();
long lTo = _toVal.getLongValue();
long lIncr = _incrVal.getLongValue();
// number of parallel workers
int P = _numThreads;
// total number of iterations
long N = _numIter;
// remaining number of iterations
long R = N;
// next _numThreads task sizes
long K = -1;
// type of iterations: range tasks (similar to run-length encoding) make only sense if taskSize>3
TaskType type = null;
try {
for (long i = lFrom; i <= lTo; ) {
K = determineNextBatchSize(R, P);
R -= (K * P);
type = (ParForProgramBlock.USE_RANGE_TASKS_IF_USEFUL && K > 3) ? TaskType.RANGE : TaskType.SET;
// for each logical processor
for (int j = 0; j < P; j++) {
if (// no more iterations
i > lTo)
break;
// create new task and add to list of tasks
Task lTask = new Task(_iterVarName, type);
// add iterations to task
if (type == TaskType.SET) {
// value based tasks
for (long k = 0; k < K && i <= lTo; k++, i += lIncr) lTask.addIteration(new IntObject(i));
} else {
// determine end of task
long to = Math.min(i + (K - 1) * lIncr, lTo);
// range based tasks
// from
lTask.addIteration(new IntObject(i));
// to
lTask.addIteration(new IntObject(to));
// increment
lTask.addIteration(new IntObject(lIncr));
i = to + lIncr;
}
// add task to queue (after all iteration added for preventing raise conditions)
queue.enqueueTask(lTask);
numCreatedTasks++;
}
}
// mark end of task input stream
queue.closeInput();
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return numCreatedTasks;
}
use of org.apache.sysml.runtime.instructions.cp.IntObject in project systemml by apache.
the class TaskPartitionerFixedsize method createTasks.
@Override
public long createTasks(LocalTaskQueue<Task> queue) {
long numCreatedTasks = 0;
// range tasks (similar to run-length encoding) make only sense if taskSize>3
TaskType type = (ParForProgramBlock.USE_RANGE_TASKS_IF_USEFUL && _taskSize > 3) ? TaskType.RANGE : TaskType.SET;
long lFrom = _fromVal.getLongValue();
long lTo = _toVal.getLongValue();
long lIncr = _incrVal.getLongValue();
long lfnp1 = _firstnPlus1;
try {
for (long i = lFrom; i <= lTo; ) {
// create new task and add to list of tasks
Task lTask = new Task(_iterVarName, type);
// correction for static partitioner
int corr = (lfnp1-- > 0) ? 1 : 0;
// (last task might have less)
if (type == TaskType.SET) {
// value based tasks
for (long j = 0; j < _taskSize + corr && i <= lTo; j++, i += lIncr) lTask.addIteration(new IntObject(i));
} else {
// determine end of task
long to = Math.min(i + (_taskSize - 1 + corr) * lIncr, lTo);
// range based tasks
// from
lTask.addIteration(new IntObject(i));
// to
lTask.addIteration(new IntObject(to));
// increment
lTask.addIteration(new IntObject(lIncr));
i = to + lIncr;
}
// add task to queue (after all iteration added for preventing raise conditions)
queue.enqueueTask(lTask);
numCreatedTasks++;
}
// mark end of task input stream
queue.closeInput();
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return numCreatedTasks;
}
use of org.apache.sysml.runtime.instructions.cp.IntObject in project systemml by apache.
the class ForProgramBlock method execute.
@Override
public void execute(ExecutionContext ec) {
// evaluate from, to, incr only once (assumption: known at for entry)
IntObject from = executePredicateInstructions(1, _fromInstructions, ec);
IntObject to = executePredicateInstructions(2, _toInstructions, ec);
IntObject incr = (_incrementInstructions == null || _incrementInstructions.isEmpty()) ? new IntObject((from.getLongValue() <= to.getLongValue()) ? 1 : -1) : executePredicateInstructions(3, _incrementInstructions, ec);
if (// would produce infinite loop
incr.getLongValue() == 0)
throw new DMLRuntimeException(printBlockErrorLocation() + "Expression for increment " + "of variable '" + _iterPredVar + "' must evaluate to a non-zero value.");
// execute for loop
try {
// prepare update in-place variables
UpdateType[] flags = prepareUpdateInPlaceVariables(ec, _tid);
// run for loop body for each instance of predicate sequence
SequenceIterator seqIter = new SequenceIterator(from, to, incr);
for (IntObject iterVar : seqIter) {
// set iteration variable
ec.setVariable(_iterPredVar, iterVar);
// execute all child blocks
for (int i = 0; i < this._childBlocks.size(); i++) {
ec.updateDebugState(i);
_childBlocks.get(i).execute(ec);
}
}
// reset update-in-place variables
resetUpdateInPlaceVariableFlags(ec, flags);
} catch (DMLScriptException e) {
// propagate stop call
throw e;
} catch (Exception e) {
throw new DMLRuntimeException(printBlockErrorLocation() + "Error evaluating for program block", e);
}
// execute exit instructions
try {
executeInstructions(_exitInstructions, ec);
} catch (Exception e) {
throw new DMLRuntimeException(printBlockErrorLocation() + "Error evaluating for exit instructions", e);
}
}
use of org.apache.sysml.runtime.instructions.cp.IntObject in project systemml by apache.
the class ParForProgramBlock method execute.
@Override
public void execute(ExecutionContext ec) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
// evaluate from, to, incr only once (assumption: known at for entry)
IntObject from = executePredicateInstructions(1, _fromInstructions, ec);
IntObject to = executePredicateInstructions(2, _toInstructions, ec);
IntObject incr = (_incrementInstructions == null || _incrementInstructions.isEmpty()) ? new IntObject((from.getLongValue() <= to.getLongValue()) ? 1 : -1) : executePredicateInstructions(3, _incrementInstructions, ec);
if (// would produce infinite loop
incr.getLongValue() == 0)
throw new DMLRuntimeException(this.printBlockErrorLocation() + "Expression for increment " + "of variable '" + _iterPredVar + "' must evaluate to a non-zero value.");
// early exit on num iterations = zero
_numIterations = computeNumIterations(from, to, incr);
if (_numIterations <= 0)
// avoid unnecessary optimization/initialization
return;
// /////
if (_optMode != POptMode.NONE) {
// set optimizer log level
OptimizationWrapper.setLogLevel(_optLogLevel);
// core optimize
OptimizationWrapper.optimize(_optMode, sb, this, ec, _monitor);
}
// /////
// DATA PARTITIONING of read-only parent variables of type (matrix,unpartitioned)
// /////
Timing time = _monitor ? new Timing(true) : null;
// partitioning on demand (note: for fused data partitioning and execute the optimizer set
// the data partitioner to NONE in order to prevent any side effects)
handleDataPartitioning(ec);
// repartitioning of variables for spark cpmm/zipmm in order prevent unnecessary shuffle
handleSparkRepartitioning(ec);
// eager rdd caching of variables for spark in order prevent read/write contention
handleSparkEagerCaching(ec);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_DATA_T, time.stop());
// initialize iter var to form value
IntObject iterVar = new IntObject(from.getLongValue());
// /////
// begin PARALLEL EXECUTION of (PAR)FOR body
// /////
LOG.trace("EXECUTE PARFOR ID = " + _ID + " with mode = " + _execMode + ", numThreads = " + _numThreads + ", taskpartitioner = " + _taskPartitioner);
if (_monitor) {
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTHREADS, _numThreads);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_TASKSIZE, _taskSize);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_TASKPARTITIONER, _taskPartitioner.ordinal());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_DATAPARTITIONER, _dataPartitioner.ordinal());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_EXECMODE, _execMode.ordinal());
}
// preserve shared input/result variables of cleanup
ArrayList<String> varList = ec.getVarList();
boolean[] varState = ec.pinVariables(varList);
try {
switch(_execMode) {
case // create parworkers as local threads
LOCAL:
executeLocalParFor(ec, iterVar, from, to, incr);
break;
case // create parworkers as MR tasks (one job per parfor)
REMOTE_MR:
executeRemoteMRParFor(ec, iterVar, from, to, incr);
break;
case // create parworkers as MR tasks (one job per parfor)
REMOTE_MR_DP:
executeRemoteMRParForDP(ec, iterVar, from, to, incr);
break;
case // create parworkers as Spark tasks (one job per parfor)
REMOTE_SPARK:
executeRemoteSparkParFor(ec, iterVar, from, to, incr);
break;
case // create parworkers as Spark tasks (one job per parfor)
REMOTE_SPARK_DP:
executeRemoteSparkParForDP(ec, iterVar, from, to, incr);
break;
default:
throw new DMLRuntimeException("Undefined execution mode: '" + _execMode + "'.");
}
} catch (Exception ex) {
throw new DMLRuntimeException("PARFOR: Failed to execute loop in parallel.", ex);
}
// reset state of shared input/result variables
ec.unpinVariables(varList, varState);
// cleanup unpinned shared variables
cleanupSharedVariables(ec, varState);
// set iteration var to TO value (+ increment) for FOR equivalence
// consistent with for
iterVar = new IntObject(to.getLongValue());
ec.setVariable(_iterPredVar, iterVar);
// we can replace those variables, because partitioning only applied for read-only matrices
for (String var : _variablesDPOriginal.keySet()) {
// cleanup partitioned matrix (if not reused)
if (!_variablesDPReuse.keySet().contains(var))
VariableCPInstruction.processRemoveVariableInstruction(ec, var);
// reset to original matrix
MatrixObject mo = (MatrixObject) _variablesDPOriginal.get(var);
ec.setVariable(var, mo);
}
// print profiling report (only if top-level parfor because otherwise in parallel context)
if (_monitorReport)
LOG.info("\n" + StatisticMonitor.createReport());
// TODO reset of hop parallelism constraint (e.g., ba+*)
for (// release forced exectypes
String dpvar : // release forced exectypes
_variablesDPOriginal.keySet()) ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, dpvar, ec, false);
// release forced exectypes for fused dp/exec
if (_execMode == PExecMode.REMOTE_MR_DP || _execMode == PExecMode.REMOTE_SPARK_DP)
ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, _colocatedDPMatrix, ec, false);
// after release, deletes dp_varnames
resetOptimizerFlags();
// execute exit instructions (usually empty)
executeInstructions(_exitInstructions, ec);
}
Aggregations