Search in sources :

Example 1 with ParForBody

use of org.apache.sysml.runtime.controlprogram.parfor.ParForBody in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParFor.

private void executeRemoteSparkParFor(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws DMLRuntimeException {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = false;
    if (FORCE_CP_ON_REMOTE_MR && (_optMode == POptMode.NONE || (_optMode == POptMode.CONSTRAINED && _execMode == PExecMode.REMOTE_SPARK))) {
        //tid = 0  because replaced in remote parworker
        flagForced = checkMRAndRecompileToCP(0);
    }
    // Step 1) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single job,
    //        cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<String, byte[]>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 2) create tasks 
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    long numIterations = partitioner.getNumIterations();
    //sequentially create tasks as input to parfor job
    List<Task> tasks = partitioner.createTasks();
    long numCreatedTasks = tasks.size();
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    //write matrices to HDFS 
    exportMatricesToHDFS(ec);
    // Step 3) submit Spark parfor job (no lazy evaluation, since collect on result)
    //MatrixObject colocatedDPMatrixObj = (_colocatedDPMatrix!=null)? (MatrixObject)ec.getVariable(_colocatedDPMatrix) : null;
    RemoteParForJobReturn ret = RemoteParForSpark.runJob(_ID, program, clsMap, tasks, ec, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 4) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    //consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (//see step 0
    flagForced)
        releaseForcedRecompile(0);
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) Task(org.apache.sysml.runtime.controlprogram.parfor.Task) HashMap(java.util.HashMap) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 2 with ParForBody

use of org.apache.sysml.runtime.controlprogram.parfor.ParForBody in project incubator-systemml by apache.

the class ParForProgramBlock method createParallelWorker.

/**
	 * Creates a new or partially recycled instance of a parallel worker. Therefore the symbol table, and child
	 * program blocks are deep copied. Note that entries of the symbol table are not deep copied because they are replaced 
	 * anyway on the next write. In case of recycling the deep copies of program blocks are recycled from previous 
	 * executions of this parfor.
	 * 
	 * @param pwID parworker id
	 * @param queue task queue
	 * @param ec execution context
	 * @return local parworker
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private LocalParWorker createParallelWorker(long pwID, LocalTaskQueue<Task> queue, ExecutionContext ec) throws DMLRuntimeException {
    LocalParWorker pw = null;
    try {
        //create deep copies of required elements child blocks
        ArrayList<ProgramBlock> cpChildBlocks = null;
        HashSet<String> fnNames = new HashSet<String>();
        if (USE_PB_CACHE) {
            if (_pbcache.containsKey(pwID)) {
                cpChildBlocks = _pbcache.get(pwID);
            } else {
                cpChildBlocks = ProgramConverter.rcreateDeepCopyProgramBlocks(_childBlocks, pwID, _IDPrefix, new HashSet<String>(), fnNames, false, false);
                _pbcache.put(pwID, cpChildBlocks);
            }
        } else {
            cpChildBlocks = ProgramConverter.rcreateDeepCopyProgramBlocks(_childBlocks, pwID, _IDPrefix, new HashSet<String>(), fnNames, false, false);
        }
        //deep copy execution context (including prepare parfor update-in-place)
        ExecutionContext cpEc = ProgramConverter.createDeepCopyExecutionContext(ec);
        // and sets it in the ExecutionContext
        if (DMLScript.USE_ACCELERATOR) {
            cpEc.setGPUContext(GPUContextPool.getFromPool());
        }
        //prepare basic update-in-place variables (vars dropped on result merge)
        prepareUpdateInPlaceVariables(cpEc, pwID);
        //copy compiler configuration (for jmlc w/o global config)
        CompilerConfig cconf = ConfigurationManager.getCompilerConfig();
        //create the actual parallel worker
        ParForBody body = new ParForBody(cpChildBlocks, _resultVars, cpEc);
        pw = new LocalParWorker(pwID, queue, body, cconf, MAX_RETRYS_ON_ERROR, _monitor);
        pw.setFunctionNames(fnNames);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return pw;
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ExecutionContext(org.apache.sysml.runtime.controlprogram.context.ExecutionContext) LocalParWorker(org.apache.sysml.runtime.controlprogram.parfor.LocalParWorker) CompilerConfig(org.apache.sysml.conf.CompilerConfig) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) CacheException(org.apache.sysml.runtime.controlprogram.caching.CacheException) HashSet(java.util.HashSet) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 3 with ParForBody

use of org.apache.sysml.runtime.controlprogram.parfor.ParForBody in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteSparkParForDP.

private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws DMLRuntimeException, IOException {
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the progam)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    //mark matrix var as partitioned  
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single job,
    //        cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    HashMap<String, byte[]> clsMap = new HashMap<String, byte[]>();
    String program = ProgramConverter.serializeParForBody(body, clsMap);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks 
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    //partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    //write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
    exportMatricesToHDFS(ec, _colocatedDPMatrix);
    // Step 4) submit MR job (wait for finished work)
    //TODO runtime support for binary cell partitioning 
    //OutputInfo inputOI = ((inputMatrix.getSparsity()<0.1 && inputDPF==PDataPartitionFormat.COLUMN_WISE)||
    //		              (inputMatrix.getSparsity()<0.001 && inputDPF==PDataPartitionFormat.ROW_WISE))? 
    //		             OutputInfo.BinaryCellOutputInfo : OutputInfo.BinaryBlockOutputInfo;
    OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, itervar.getName(), _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    //consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (//see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) HashMap(java.util.HashMap) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 4 with ParForBody

use of org.apache.sysml.runtime.controlprogram.parfor.ParForBody in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteMRParFor.

private void executeRemoteMRParFor(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws DMLRuntimeException, IOException {
    /* Step 0) check and recompile MR inst
		 * Step 1) serialize child PB and inst
		 * Step 2) create tasks
		 *         serialize tasks
		 * Step 3) submit MR Jobs and wait for results                        
		 * Step 4) collect results from each parallel worker
		 */
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = false;
    if (FORCE_CP_ON_REMOTE_MR && (_optMode == POptMode.NONE || (_optMode == POptMode.CONSTRAINED && _execMode == PExecMode.REMOTE_MR))) {
        //tid = 0  because replaced in remote parworker
        flagForced = checkMRAndRecompileToCP(0);
    }
    // Step 1) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single job,
    //        cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    String program = ProgramConverter.serializeParForBody(body);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 2) create tasks 
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String taskFile = constructTaskFileName();
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    int maxDigits = (int) Math.log10(to.getLongValue()) + 1;
    long numCreatedTasks = -1;
    if (USE_STREAMING_TASK_CREATION) {
        LocalTaskQueue<Task> queue = new LocalTaskQueue<Task>();
        //put tasks into queue and start writing to taskFile
        numCreatedTasks = partitioner.createTasks(queue);
        taskFile = writeTasksToFile(taskFile, queue, maxDigits);
    } else {
        //sequentially create tasks and write to disk
        List<Task> tasks = partitioner.createTasks();
        numCreatedTasks = tasks.size();
        taskFile = writeTasksToFile(taskFile, tasks, maxDigits);
    }
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    //write matrices to HDFS 
    exportMatricesToHDFS(ec);
    // Step 3) submit MR job (wait for finished work)
    MatrixObject colocatedDPMatrixObj = (_colocatedDPMatrix != null) ? ec.getMatrixObject(_colocatedDPMatrix) : null;
    RemoteParForJobReturn ret = RemoteParForMR.runJob(_ID, program, taskFile, resultFile, colocatedDPMatrixObj, _enableCPCaching, _numThreads, WRITE_REPLICATION_FACTOR, MAX_RETRYS_ON_ERROR, getMinMemory(ec), (ALLOW_REUSE_MR_JVMS & _jvmReuse));
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 4) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    //consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (//see step 0
    flagForced)
        releaseForcedRecompile(0);
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) Task(org.apache.sysml.runtime.controlprogram.parfor.Task) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) LocalTaskQueue(org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Example 5 with ParForBody

use of org.apache.sysml.runtime.controlprogram.parfor.ParForBody in project incubator-systemml by apache.

the class ParForProgramBlock method executeRemoteMRParForDP.

private void executeRemoteMRParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws DMLRuntimeException, IOException {
    /* Step 0) check and recompile MR inst
		 * Step 1) serialize child PB and inst
		 * Step 2) create tasks
		 *         serialize tasks
		 * Step 3) submit MR Jobs and wait for results                        
		 * Step 4) collect results from each parallel worker
		 */
    Timing time = (_monitor ? new Timing(true) : null);
    // Step 0) check and compile to CP (if forced remote parfor)
    boolean flagForced = checkMRAndRecompileToCP(0);
    // Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
    PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
    //mark matrix var as partitioned  
    inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
    // Step 2) init parallel workers (serialize PBs)
    // NOTES: each mapper changes filenames with regard to his ID as we submit a single job,
    //        cannot reuse serialized string, since variables are serialized as well.
    ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
    String program = ProgramConverter.serializeParForBody(body);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
    // Step 3) create tasks 
    TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
    String resultFile = constructResultFileName();
    long numIterations = partitioner.getNumIterations();
    //partitioner.createTasks().size();
    long numCreatedTasks = numIterations;
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
    //write matrices to HDFS 
    exportMatricesToHDFS(ec);
    // Step 4) submit MR job (wait for finished work)
    OutputInfo inputOI = ((inputMatrix.getSparsity() < 0.1 && inputDPF == PartitionFormat.COLUMN_WISE) || (inputMatrix.getSparsity() < 0.001 && inputDPF == PartitionFormat.ROW_WISE)) ? OutputInfo.BinaryCellOutputInfo : OutputInfo.BinaryBlockOutputInfo;
    RemoteParForJobReturn ret = RemoteDPParForMR.runJob(_ID, itervar.getName(), _colocatedDPMatrix, program, resultFile, inputMatrix, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads, _replicationDP);
    if (_monitor)
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
    // Step 5) collecting results from each parallel worker
    int numExecutedTasks = ret.getNumExecutedTasks();
    int numExecutedIterations = ret.getNumExecutedIterations();
    //consolidate results into global symbol table
    consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
    if (//see step 0
    flagForced)
        releaseForcedRecompile(0);
    inputMatrix.unsetPartitioned();
    if (_monitor) {
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
        StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) ParForBody(org.apache.sysml.runtime.controlprogram.parfor.ParForBody) RemoteParForJobReturn(org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) TaskPartitioner(org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)

Aggregations

ParForBody (org.apache.sysml.runtime.controlprogram.parfor.ParForBody)5 RemoteParForJobReturn (org.apache.sysml.runtime.controlprogram.parfor.RemoteParForJobReturn)4 TaskPartitioner (org.apache.sysml.runtime.controlprogram.parfor.TaskPartitioner)4 Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)4 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)3 HashMap (java.util.HashMap)2 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)2 Task (org.apache.sysml.runtime.controlprogram.parfor.Task)2 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 CompilerConfig (org.apache.sysml.conf.CompilerConfig)1 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)1 CacheException (org.apache.sysml.runtime.controlprogram.caching.CacheException)1 ExecutionContext (org.apache.sysml.runtime.controlprogram.context.ExecutionContext)1 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)1 LocalParWorker (org.apache.sysml.runtime.controlprogram.parfor.LocalParWorker)1 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)1