use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class LocalParWorker method run.
@Override
public void run() {
// monitoring start
Timing time1 = (_monitor ? new Timing(true) : null);
// spark context creation (if data cached already created)
if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
SparkExecutionContext sec = (SparkExecutionContext) _ec;
sec.setThreadLocalSchedulerPool("parforPool" + _workerID);
}
// Initialize this GPUContext to this thread
if (DMLScript.USE_ACCELERATOR) {
try {
_ec.getGPUContext(0).initializeThread();
} catch (DMLRuntimeException e) {
LOG.error("Error executing task because of failure in GPU backend: ", e);
LOG.error("Stopping LocalParWorker.");
return;
}
}
// setup compiler config for worker thread
ConfigurationManager.setLocalConfig(_cconf);
// continuous execution (execute tasks until (1) stopped or (2) no more tasks)
Task lTask = null;
while (!_stopped) {
// dequeue the next task (abort on NO_MORE_TASKS or error)
try {
lTask = _taskQueue.dequeueTask();
if (// task queue closed (no more tasks)
lTask == LocalTaskQueue.NO_MORE_TASKS)
// normal end of parallel worker
break;
} catch (Exception ex) {
// abort on taskqueue error
LOG.warn("Error reading from task queue: " + ex.getMessage());
LOG.warn("Stopping LocalParWorker.");
// no exception thrown to prevent blocking on join
break;
}
// execute the task sequentially (re-try on error)
boolean success = false;
int retrys = _max_retry;
while (!success) {
try {
// /////
// core execution (see ParWorker)
executeTask(lTask);
success = true;
} catch (Exception ex) {
LOG.error("Failed to execute " + lTask.toString() + ", retry:" + retrys, ex);
if (retrys > 0)
// retry on task error
retrys--;
else {
// abort on no remaining retrys
LOG.error("Error executing task: ", ex);
LOG.error("Stopping LocalParWorker.");
// no exception thrown to prevent blocking on join
break;
}
}
}
}
// setup fair scheduler pool for worker thread
if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
SparkExecutionContext sec = (SparkExecutionContext) _ec;
sec.cleanupThreadLocalSchedulerPool();
}
if (_monitor) {
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMTASKS, _numTasks);
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMITERS, _numIters);
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_EXEC_T, time1.stop());
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class ParWorker method executeRangeTask.
private void executeRangeTask(Task task) {
// monitoring start
Timing time1 = null, time2 = null;
if (_monitor) {
time1 = new Timing(true);
time2 = new Timing(true);
}
// core execution
List<IntObject> tmp = task.getIterations();
String lVarName = task.getVarName();
long lFrom = tmp.get(0).getLongValue();
long lTo = tmp.get(1).getLongValue();
long lIncr = tmp.get(2).getLongValue();
for (long i = lFrom; i <= lTo; i += lIncr) {
// set index values
_ec.setVariable(lVarName, new IntObject(i));
// for each program block
for (ProgramBlock pb : _childBlocks) pb.execute(_ec);
_numIters++;
if (_monitor)
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_ITER_T, time1.stop());
}
_numTasks++;
// monitoring end
if (_monitor) {
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASKSIZE, task.size());
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASK_T, time2.stop());
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class ParWorker method executeSetTask.
private void executeSetTask(Task task) {
// monitoring start
Timing time1 = null, time2 = null;
if (_monitor) {
time1 = new Timing(true);
time2 = new Timing(true);
}
// core execution
// foreach iteration in task, execute iteration body
String lVarName = task.getVarName();
for (IntObject indexVal : task.getIterations()) {
// System.out.println(" EXECUTE ITERATION: "+indexVal.getName()+"="+indexVal.getIntValue());
// set index values
_ec.setVariable(lVarName, indexVal);
// for each program block
for (ProgramBlock pb : _childBlocks) pb.execute(_ec);
_numIters++;
if (_monitor)
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_ITER_T, time1.stop());
}
_numTasks++;
// monitoring end
if (_monitor) {
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASKSIZE, task.size());
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASK_T, time2.stop());
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class CostEstimationWrapper method getTimeEstimate.
public static double getTimeEstimate(ProgramBlock pb, ExecutionContext ec, boolean recursive) {
Timing time = new Timing(true);
HashMap<String, VarStats> stats = new HashMap<>();
LocalVariableMap vars = (ec != null) ? ec.getVariables() : new LocalVariableMap();
double costs = _costEstim.getTimeEstimate(pb, vars, stats, recursive);
LOG.debug("Finished estimation in " + time.stop() + "ms.");
return costs;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class ParForProgramBlock method executeRemoteSparkParFor.
private void executeRemoteSparkParFor(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) {
Timing time = (_monitor ? new Timing(true) : null);
// Step 0) check and compile to CP (if forced remote parfor)
boolean flagForced = false;
if (FORCE_CP_ON_REMOTE_MR && (_optMode == POptMode.NONE || (_optMode == POptMode.CONSTRAINED && _execMode == PExecMode.REMOTE_SPARK))) {
// tid = 0 because replaced in remote parworker
flagForced = checkMRAndRecompileToCP(0);
}
// Step 1) init parallel workers (serialize PBs)
// NOTES: each mapper changes filenames with regard to his ID as we submit a single
// job, cannot reuse serialized string, since variables are serialized as well.
ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
HashMap<String, byte[]> clsMap = new HashMap<>();
String program = ProgramConverter.serializeParForBody(body, clsMap);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
// Step 2) create tasks
TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
long numIterations = partitioner.getNumIterations();
// sequentially create tasks as input to parfor job
List<Task> tasks = partitioner.createTasks();
long numCreatedTasks = tasks.size();
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
// write matrices to HDFS
exportMatricesToHDFS(ec);
// Step 3) submit Spark parfor job (no lazy evaluation, since collect on result)
// MatrixObject colocatedDPMatrixObj = (_colocatedDPMatrix!=null)? (MatrixObject)ec.getVariable(_colocatedDPMatrix) : null;
RemoteParForJobReturn ret = RemoteParForSpark.runJob(_ID, program, clsMap, tasks, ec, _enableCPCaching, _numThreads);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
// Step 4) collecting results from each parallel worker
int numExecutedTasks = ret.getNumExecutedTasks();
int numExecutedIterations = ret.getNumExecutedIterations();
// consolidate results into global symbol table
consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
if (// see step 0
flagForced)
releaseForcedRecompile(0);
if (_monitor) {
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
}
}
Aggregations