Search in sources :

Example 6 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class LocalParWorker method run.

@Override
public void run() {
    // monitoring start
    Timing time1 = (_monitor ? new Timing(true) : null);
    // spark context creation (if data cached already created)
    if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
        SparkExecutionContext sec = (SparkExecutionContext) _ec;
        sec.setThreadLocalSchedulerPool("parforPool" + _workerID);
    }
    // Initialize this GPUContext to this thread
    if (DMLScript.USE_ACCELERATOR) {
        try {
            _ec.getGPUContext(0).initializeThread();
        } catch (DMLRuntimeException e) {
            LOG.error("Error executing task because of failure in GPU backend: ", e);
            LOG.error("Stopping LocalParWorker.");
            return;
        }
    }
    // setup compiler config for worker thread
    ConfigurationManager.setLocalConfig(_cconf);
    // continuous execution (execute tasks until (1) stopped or (2) no more tasks)
    Task lTask = null;
    while (!_stopped) {
        // dequeue the next task (abort on NO_MORE_TASKS or error)
        try {
            lTask = _taskQueue.dequeueTask();
            if (// task queue closed (no more tasks)
            lTask == LocalTaskQueue.NO_MORE_TASKS)
                // normal end of parallel worker
                break;
        } catch (Exception ex) {
            // abort on taskqueue error
            LOG.warn("Error reading from task queue: " + ex.getMessage());
            LOG.warn("Stopping LocalParWorker.");
            // no exception thrown to prevent blocking on join
            break;
        }
        // execute the task sequentially (re-try on error)
        boolean success = false;
        int retrys = _max_retry;
        while (!success) {
            try {
                // /////
                // core execution (see ParWorker)
                executeTask(lTask);
                success = true;
            } catch (Exception ex) {
                LOG.error("Failed to execute " + lTask.toString() + ", retry:" + retrys, ex);
                if (retrys > 0)
                    // retry on task error
                    retrys--;
                else {
                    // abort on no remaining retrys
                    LOG.error("Error executing task: ", ex);
                    LOG.error("Stopping LocalParWorker.");
                    // no exception thrown to prevent blocking on join
                    break;
                }
            }
        }
    }
    // setup fair scheduler pool for worker thread
    if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
        SparkExecutionContext sec = (SparkExecutionContext) _ec;
        sec.cleanupThreadLocalSchedulerPool();
    }
    if (_monitor) {
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMTASKS, _numTasks);
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMITERS, _numIters);
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_EXEC_T, time1.stop());
    }
}
Also used : Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 7 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class ParWorker method executeRangeTask.

private void executeRangeTask(Task task) {
    // monitoring start
    Timing time1 = null, time2 = null;
    if (_monitor) {
        time1 = new Timing(true);
        time2 = new Timing(true);
    }
    // core execution
    List<IntObject> tmp = task.getIterations();
    String lVarName = task.getVarName();
    long lFrom = tmp.get(0).getLongValue();
    long lTo = tmp.get(1).getLongValue();
    long lIncr = tmp.get(2).getLongValue();
    for (long i = lFrom; i <= lTo; i += lIncr) {
        // set index values
        _ec.setVariable(lVarName, new IntObject(i));
        // for each program block
        for (ProgramBlock pb : _childBlocks) pb.execute(_ec);
        _numIters++;
        if (_monitor)
            StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_ITER_T, time1.stop());
    }
    _numTasks++;
    // monitoring end
    if (_monitor) {
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASKSIZE, task.size());
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASK_T, time2.stop());
    }
}
Also used : IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) ProgramBlock(org.apache.sysml.runtime.controlprogram.ProgramBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 8 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class ParWorker method executeSetTask.

private void executeSetTask(Task task) {
    // monitoring start
    Timing time1 = null, time2 = null;
    if (_monitor) {
        time1 = new Timing(true);
        time2 = new Timing(true);
    }
    // core execution
    // foreach iteration in task, execute iteration body
    String lVarName = task.getVarName();
    for (IntObject indexVal : task.getIterations()) {
        // System.out.println(" EXECUTE ITERATION: "+indexVal.getName()+"="+indexVal.getIntValue());
        // set index values
        _ec.setVariable(lVarName, indexVal);
        // for each program block
        for (ProgramBlock pb : _childBlocks) pb.execute(_ec);
        _numIters++;
        if (_monitor)
            StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_ITER_T, time1.stop());
    }
    _numTasks++;
    // monitoring end
    if (_monitor) {
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASKSIZE, task.size());
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_TASK_T, time2.stop());
    }
}
Also used : IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) ProgramBlock(org.apache.sysml.runtime.controlprogram.ProgramBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 9 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class CostEstimationWrapper method getTimeEstimate.

public static double getTimeEstimate(ProgramBlock pb, ExecutionContext ec, boolean recursive) {
    Timing time = new Timing(true);
    HashMap<String, VarStats> stats = new HashMap<>();
    LocalVariableMap vars = (ec != null) ? ec.getVariables() : new LocalVariableMap();
    double costs = _costEstim.getTimeEstimate(pb, vars, stats, recursive);
    LOG.debug("Finished estimation in " + time.stop() + "ms.");
    return costs;
}
Also used : HashMap(java.util.HashMap) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 10 with Timing

use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParFor.

private void executeRemoteSparkParFor(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = false;
    if (FORCE_CP_ON_REMOTE_MR && (_optMode == POptMode.NONE || (_optMode == POptMode.CONSTRAINED && _execMode == PExecMode.REMOTE_SPARK))) {
        // tid = 0  because replaced in remote parworker
        flagForced = checkMRAndRecompileToCP(0);
    }
    // Step 1) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single
    // job, cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 2) create tasks
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    long numIterations = partitioner.getNumIterations();
    // sequentially create tasks as input to parfor job
    List<Task> tasks = partitioner.createTasks();
    long numCreatedTasks = tasks.size();
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    // write matrices to HDFS
    exportMatricesToHDFS(ec);
    // Step 3) submit Spark parfor job (no lazy evaluation, since collect on result)
    // MatrixObject colocatedDPMatrixObj = (_colocatedDPMatrix!=null)? (MatrixObject)ec.getVariable(_colocatedDPMatrix) : null;
    RemoteParForJobReturn ret = RemoteParForSpark.runJob(_ID, program, clsMap, tasks, ec, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 4) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    // consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (// see step 0
    flagForced)
        releaseForcedRecompile(0);
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) Task(org.apache.sysml.runtime.controlprogram.parfor.Task) HashMap(java.util.HashMap) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Aggregations

Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)78 IOException (java.io.IOException)31 ArrayList (java.util.ArrayList)29 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)25 HashMap (java.util.HashMap)24 Connection (org.apache.sysml.api.jmlc.Connection)17 PreparedScript (org.apache.sysml.api.jmlc.PreparedScript)17 ResultVariables (org.apache.sysml.api.jmlc.ResultVariables)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)17 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 TaskPartitioner (org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)10 ParForBody (org.apache.sysml.runtime.controlprogram.parfor.ParForBody)8 RemoteParForJobReturn (org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn)8 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)7 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)7 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)6 Task (org.apache.sysml.runtime.controlprogram.parfor.Task)6