Search in sources :

Example 1 with ResultMerge

use of org.apache.sysml.runtime.controlprogram.parfor.ResultMerge in project incubator-systemml by apache.

the class ParForProgramBlock method createResultMerge.

private ResultMerge createResultMerge(PResultMerge prm, MatrixObject out, MatrixObject[] in, String fname, boolean accum, ExecutionContext ec) {
    ResultMerge rm = null;
    // determine degree of parallelism
    int maxMap = -1, maxRed = -1;
    if (OptimizerUtils.isSparkExecutionMode()) {
        maxMap = (int) SparkExecutionContext.getDefaultParallelism(true);
        // equal map/reduce
        maxRed = maxMap;
    } else {
        int numReducers = ConfigurationManager.getNumReducers();
        maxMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
        maxRed = Math.min(numReducers, InfrastructureAnalyzer.getRemoteParallelReduceTasks());
        // correction max number of reducers on yarn clusters
        if (InfrastructureAnalyzer.isYarnEnabled()) {
            maxMap = (int) Math.max(maxMap, YarnClusterAnalyzer.getNumCores());
            maxRed = (int) Math.max(maxRed, YarnClusterAnalyzer.getNumCores() / 2);
        }
    }
    int numMap = Math.max(_numThreads, maxMap);
    int numRed = maxRed;
    // create result merge implementation
    switch(prm) {
        case LOCAL_MEM:
            rm = new ResultMergeLocalMemory(out, in, fname, accum);
            break;
        case LOCAL_FILE:
            rm = new ResultMergeLocalFile(out, in, fname, accum);
            break;
        case LOCAL_AUTOMATIC:
            rm = new ResultMergeLocalAutomatic(out, in, fname, accum);
            break;
        case REMOTE_MR:
            rm = new ResultMergeRemoteMR(out, in, fname, accum, _ID, numMap, numRed, WRITE_REPLICATION_FACTOR, MAX_RETRYS_ON_ERROR, ALLOW_REUSE_MR_JVMS);
            break;
        case REMOTE_SPARK:
            rm = new ResultMergeRemoteSpark(out, in, fname, accum, ec, numMap, numRed);
            break;
        default:
            throw new DMLRuntimeException("Undefined result merge: '" + prm.toString() + "'.");
    }
    return rm;
}
Also used : ResultMergeRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteMR) ResultMergeLocalMemory(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalMemory) ResultMergeLocalAutomatic(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalAutomatic) ResultMergeRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteSpark) ResultMerge(org.apache.sysml.runtime.controlprogram.parfor.ResultMerge) ResultMergeLocalFile(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with ResultMerge

use of org.apache.sysml.runtime.controlprogram.parfor.ResultMerge in project incubator-systemml by apache.

the class ParForProgramBlock method consolidateAndCheckResults.

private void consolidateAndCheckResults(ExecutionContext ec, long expIters, long expTasks, long numIters, long numTasks, LocalVariableMap[] results) {
    Timing time = new Timing(true);
    // result merge
    if (checkParallelRemoteResultMerge()) {
        // execute result merge in parallel for all result vars
        int par = Math.min(_resultVars.size(), InfrastructureAnalyzer.getLocalParallelism());
        if (InfrastructureAnalyzer.isLocalMode()) {
            int parmem = (int) Math.floor(OptimizerUtils.getLocalMemBudget() / InfrastructureAnalyzer.getRemoteMaxMemorySortBuffer());
            // reduce k if necessary
            par = Math.min(par, Math.max(parmem, 1));
        }
        try {
            // enqueue all result vars as tasks
            LocalTaskQueue<ResultVar> q = new LocalTaskQueue<>();
            for (ResultVar var : _resultVars) {
                // foreach non-local write
                if (// robustness scalars
                ec.getVariable(var._name) instanceof MatrixObject)
                    q.enqueueTask(var);
            }
            q.closeInput();
            // run result merge workers
            ResultMergeWorker[] rmWorkers = new ResultMergeWorker[par];
            for (int i = 0; i < par; i++) rmWorkers[i] = new ResultMergeWorker(q, results, ec);
            for (// start all
            int i = 0; // start all
            i < par; // start all
            i++) rmWorkers[i].start();
            for (int i = 0; i < par; i++) {
                // wait for all
                rmWorkers[i].join();
                if (!rmWorkers[i].finishedNoError())
                    throw new DMLRuntimeException("Error occured in parallel result merge worker.");
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    } else {
        // execute result merge sequentially for all result vars
        for (// foreach non-local write
        ResultVar var : // foreach non-local write
        _resultVars) {
            Data dat = ec.getVariable(var._name);
            if (// robustness scalars
            dat instanceof MatrixObject) {
                MatrixObject out = (MatrixObject) dat;
                MatrixObject[] in = new MatrixObject[results.length];
                for (int i = 0; i < results.length; i++) in[i] = (MatrixObject) results[i].get(var._name);
                String fname = constructResultMergeFileName();
                ResultMerge rm = createResultMerge(_resultMerge, out, in, fname, var._isAccum, ec);
                MatrixObject outNew = null;
                if (USE_PARALLEL_RESULT_MERGE)
                    outNew = rm.executeParallelMerge(_numThreads);
                else
                    outNew = rm.executeSerialMerge();
                // cleanup existing var
                Data exdata = ec.removeVariable(var._name);
                if (exdata != null && exdata != outNew && exdata instanceof MatrixObject)
                    ec.cleanupCacheableData((MatrixObject) exdata);
                // cleanup of intermediate result variables
                cleanWorkerResultVariables(ec, out, in);
                // set merged result variable
                ec.setVariable(var._name, outNew);
            }
        }
    }
    // handle unscoped variables (vars created in parfor, but potentially used afterwards)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    if (// sb might be null for nested parallelism
    CREATE_UNSCOPED_RESULTVARS && sb != null && ec.getVariables() != null)
        createEmptyUnscopedVariables(ec.getVariables(), sb);
    // check expected counters
    if (// consistency check
    numTasks != expTasks || numIters != expIters)
        throw new DMLRuntimeException("PARFOR: Number of executed tasks does not match the number of created tasks: tasks " + numTasks + "/" + expTasks + ", iters " + numIters + "/" + expIters + ".");
    if (DMLScript.STATISTICS)
        Statistics.incrementParForMergeTime((long) time.stop());
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Data(org.apache.sysml.runtime.instructions.cp.Data) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LocalTaskQueue(org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ResultMerge(org.apache.sysml.runtime.controlprogram.parfor.ResultMerge) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 3 with ResultMerge

use of org.apache.sysml.runtime.controlprogram.parfor.ResultMerge in project systemml by apache.

the class ParForProgramBlock method consolidateAndCheckResults.

private void consolidateAndCheckResults(ExecutionContext ec, long expIters, long expTasks, long numIters, long numTasks, LocalVariableMap[] results) {
    Timing time = new Timing(true);
    // result merge
    if (checkParallelRemoteResultMerge()) {
        // execute result merge in parallel for all result vars
        int par = Math.min(_resultVars.size(), InfrastructureAnalyzer.getLocalParallelism());
        if (InfrastructureAnalyzer.isLocalMode()) {
            int parmem = (int) Math.floor(OptimizerUtils.getLocalMemBudget() / InfrastructureAnalyzer.getRemoteMaxMemorySortBuffer());
            // reduce k if necessary
            par = Math.min(par, Math.max(parmem, 1));
        }
        try {
            // enqueue all result vars as tasks
            LocalTaskQueue<ResultVar> q = new LocalTaskQueue<>();
            for (ResultVar var : _resultVars) {
                // foreach non-local write
                if (// robustness scalars
                ec.getVariable(var._name) instanceof MatrixObject)
                    q.enqueueTask(var);
            }
            q.closeInput();
            // run result merge workers
            ResultMergeWorker[] rmWorkers = new ResultMergeWorker[par];
            for (int i = 0; i < par; i++) rmWorkers[i] = new ResultMergeWorker(q, results, ec);
            for (// start all
            int i = 0; // start all
            i < par; // start all
            i++) rmWorkers[i].start();
            for (int i = 0; i < par; i++) {
                // wait for all
                rmWorkers[i].join();
                if (!rmWorkers[i].finishedNoError())
                    throw new DMLRuntimeException("Error occured in parallel result merge worker.");
            }
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
    } else {
        // execute result merge sequentially for all result vars
        for (// foreach non-local write
        ResultVar var : // foreach non-local write
        _resultVars) {
            Data dat = ec.getVariable(var._name);
            if (// robustness scalars
            dat instanceof MatrixObject) {
                MatrixObject out = (MatrixObject) dat;
                MatrixObject[] in = new MatrixObject[results.length];
                for (int i = 0; i < results.length; i++) in[i] = (MatrixObject) results[i].get(var._name);
                String fname = constructResultMergeFileName();
                ResultMerge rm = createResultMerge(_resultMerge, out, in, fname, var._isAccum, ec);
                MatrixObject outNew = null;
                if (USE_PARALLEL_RESULT_MERGE)
                    outNew = rm.executeParallelMerge(_numThreads);
                else
                    outNew = rm.executeSerialMerge();
                // cleanup existing var
                Data exdata = ec.removeVariable(var._name);
                if (exdata != null && exdata != outNew && exdata instanceof MatrixObject)
                    ec.cleanupCacheableData((MatrixObject) exdata);
                // cleanup of intermediate result variables
                cleanWorkerResultVariables(ec, out, in);
                // set merged result variable
                ec.setVariable(var._name, outNew);
            }
        }
    }
    // handle unscoped variables (vars created in parfor, but potentially used afterwards)
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    if (// sb might be null for nested parallelism
    CREATE_UNSCOPED_RESULTVARS && sb != null && ec.getVariables() != null)
        createEmptyUnscopedVariables(ec.getVariables(), sb);
    // check expected counters
    if (// consistency check
    numTasks != expTasks || numIters != expIters)
        throw new DMLRuntimeException("PARFOR: Number of executed tasks does not match the number of created tasks: tasks " + numTasks + "/" + expTasks + ", iters " + numIters + "/" + expIters + ".");
    if (DMLScript.STATISTICS)
        Statistics.incrementParForMergeTime((long) time.stop());
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Data(org.apache.sysml.runtime.instructions.cp.Data) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LocalTaskQueue(org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ResultMerge(org.apache.sysml.runtime.controlprogram.parfor.ResultMerge) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 4 with ResultMerge

use of org.apache.sysml.runtime.controlprogram.parfor.ResultMerge in project systemml by apache.

the class ParForProgramBlock method createResultMerge.

private ResultMerge createResultMerge(PResultMerge prm, MatrixObject out, MatrixObject[] in, String fname, boolean accum, ExecutionContext ec) {
    ResultMerge rm = null;
    // determine degree of parallelism
    int maxMap = -1, maxRed = -1;
    if (OptimizerUtils.isSparkExecutionMode()) {
        maxMap = (int) SparkExecutionContext.getDefaultParallelism(true);
        // equal map/reduce
        maxRed = maxMap;
    } else {
        int numReducers = ConfigurationManager.getNumReducers();
        maxMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
        maxRed = Math.min(numReducers, InfrastructureAnalyzer.getRemoteParallelReduceTasks());
        // correction max number of reducers on yarn clusters
        if (InfrastructureAnalyzer.isYarnEnabled()) {
            maxMap = (int) Math.max(maxMap, YarnClusterAnalyzer.getNumCores());
            maxRed = (int) Math.max(maxRed, YarnClusterAnalyzer.getNumCores() / 2);
        }
    }
    int numMap = Math.max(_numThreads, maxMap);
    int numRed = maxRed;
    // create result merge implementation
    switch(prm) {
        case LOCAL_MEM:
            rm = new ResultMergeLocalMemory(out, in, fname, accum);
            break;
        case LOCAL_FILE:
            rm = new ResultMergeLocalFile(out, in, fname, accum);
            break;
        case LOCAL_AUTOMATIC:
            rm = new ResultMergeLocalAutomatic(out, in, fname, accum);
            break;
        case REMOTE_MR:
            rm = new ResultMergeRemoteMR(out, in, fname, accum, _ID, numMap, numRed, WRITE_REPLICATION_FACTOR, MAX_RETRYS_ON_ERROR, ALLOW_REUSE_MR_JVMS);
            break;
        case REMOTE_SPARK:
            rm = new ResultMergeRemoteSpark(out, in, fname, accum, ec, numMap, numRed);
            break;
        default:
            throw new DMLRuntimeException("Undefined result merge: '" + prm.toString() + "'.");
    }
    return rm;
}
Also used : ResultMergeRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteMR) ResultMergeLocalMemory(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalMemory) ResultMergeLocalAutomatic(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalAutomatic) ResultMergeRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteSpark) ResultMerge(org.apache.sysml.runtime.controlprogram.parfor.ResultMerge) ResultMergeLocalFile(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 5 with ResultMerge

use of org.apache.sysml.runtime.controlprogram.parfor.ResultMerge in project incubator-systemml by apache.

the class ParForProgramBlock method createResultMerge.

private ResultMerge createResultMerge(PResultMerge prm, MatrixObject out, MatrixObject[] in, String fname, ExecutionContext ec) throws DMLRuntimeException {
    ResultMerge rm = null;
    //determine degree of parallelism
    int maxMap = -1, maxRed = -1;
    if (OptimizerUtils.isSparkExecutionMode()) {
        maxMap = (int) SparkExecutionContext.getDefaultParallelism(true);
        //equal map/reduce
        maxRed = maxMap;
    } else {
        int numReducers = ConfigurationManager.getNumReducers();
        maxMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
        maxRed = Math.min(numReducers, InfrastructureAnalyzer.getRemoteParallelReduceTasks());
        //correction max number of reducers on yarn clusters
        if (InfrastructureAnalyzer.isYarnEnabled()) {
            maxMap = (int) Math.max(maxMap, YarnClusterAnalyzer.getNumCores());
            maxRed = (int) Math.max(maxRed, YarnClusterAnalyzer.getNumCores() / 2);
        }
    }
    int numMap = Math.max(_numThreads, maxMap);
    int numRed = maxRed;
    //create result merge implementation		
    switch(prm) {
        case LOCAL_MEM:
            rm = new ResultMergeLocalMemory(out, in, fname);
            break;
        case LOCAL_FILE:
            rm = new ResultMergeLocalFile(out, in, fname);
            break;
        case LOCAL_AUTOMATIC:
            rm = new ResultMergeLocalAutomatic(out, in, fname);
            break;
        case REMOTE_MR:
            rm = new ResultMergeRemoteMR(out, in, fname, _ID, numMap, numRed, WRITE_REPLICATION_FACTOR, MAX_RETRYS_ON_ERROR, ALLOW_REUSE_MR_JVMS);
            break;
        case REMOTE_SPARK:
            rm = new ResultMergeRemoteSpark(out, in, fname, ec, numMap, numRed);
            break;
        default:
            throw new DMLRuntimeException("Undefined result merge: '" + prm.toString() + "'.");
    }
    return rm;
}
Also used : ResultMergeRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteMR) ResultMergeLocalMemory(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalMemory) ResultMergeLocalAutomatic(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalAutomatic) ResultMergeRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteSpark) ResultMerge(org.apache.sysml.runtime.controlprogram.parfor.ResultMerge) ResultMergeLocalFile(org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 ResultMerge (org.apache.sysml.runtime.controlprogram.parfor.ResultMerge)5 ResultMergeLocalAutomatic (org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalAutomatic)3 ResultMergeLocalFile (org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile)3 ResultMergeLocalMemory (org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalMemory)3 ResultMergeRemoteMR (org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteMR)3 ResultMergeRemoteSpark (org.apache.sysml.runtime.controlprogram.parfor.ResultMergeRemoteSpark)3 IOException (java.io.IOException)2 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)2 ResultVar (org.apache.sysml.parser.ParForStatementBlock.ResultVar)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 LocalTaskQueue (org.apache.sysml.runtime.controlprogram.parfor.LocalTaskQueue)2 Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)2 Data (org.apache.sysml.runtime.instructions.cp.Data)2