Search in sources :

Example 1 with DataPartitioner

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project incubator-systemml by apache.

the class DataPartitionMR method processPartitionInstructions.

private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
    int i = 0;
    for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
        if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
            // long begin = System.currentTimeMillis();
            String[] parts = InstructionUtils.getInstructionParts(inst);
            int input_index = Integer.parseInt(parts[1]);
            int output_index = Integer.parseInt(parts[2]);
            MatrixObject in = inputMatrices[input_index];
            MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
            PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
            long rlen = in.getNumRows();
            long clen = in.getNumColumns();
            long brlen = in.getNumRowsPerBlock();
            long bclen = in.getNumColumnsPerBlock();
            long N = -1;
            switch(pformat) {
                case ROW_BLOCK_WISE_N:
                    {
                        long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                        N = numRowBlocks * brlen;
                        break;
                    }
                case COLUMN_BLOCK_WISE_N:
                    {
                        long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                        N = numColBlocks * bclen;
                        break;
                    }
                default:
                    throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
            }
            PartitionFormat pf = new PartitionFormat(pformat, (int) N);
            DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
            out = dpart.createPartitionedMatrixObject(in, out, true);
            sts[i] = out.getMatrixCharacteristics();
            i++;
        }
    }
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with DataPartitioner

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project systemml by apache.

the class ParForProgramBlock method handleDataPartitioning.

private void handleDataPartitioning(ExecutionContext ec) {
    PDataPartitioner dataPartitioner = _dataPartitioner;
    if (dataPartitioner != PDataPartitioner.NONE) {
        ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
        if (sb == null)
            throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
        for (String var : sb.getReadOnlyParentVars()) {
            Data dat = ec.getVariable(var);
            // partitioning but typically related branches are never executed)
            if (dat != null && dat instanceof MatrixObject) {
                // unpartitioned input
                MatrixObject moVar = (MatrixObject) dat;
                PartitionFormat dpf = sb.determineDataPartitionFormat(var);
                LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
                if (dpf != PartitionFormat.NONE) {
                    if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
                        LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
                        dataPartitioner = PDataPartitioner.REMOTE_SPARK;
                    }
                    Timing ltime = new Timing(true);
                    // input data partitioning (reuse if possible)
                    Data dpdatNew = _variablesDPReuse.get(var);
                    if (// no reuse opportunity
                    dpdatNew == null) {
                        DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
                        // disable binary cell for sparse if consumed by MR jobs
                        if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
                        OptimizerUtils.isSparkExecutionMode()) {
                            dp.disableBinaryCell();
                        }
                        MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
                        dpdatNew = moVarNew;
                        // skip remaining partitioning logic if not partitioned (e.g., too small)
                        if (moVar == moVarNew)
                            // skip to next
                            continue;
                    }
                    ec.setVariable(var, dpdatNew);
                    // recompile parfor body program
                    ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
                    // store original and partitioned matrix (for reuse if applicable)
                    _variablesDPOriginal.put(var, moVar);
                    if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
                        _variablesDPReuse.put(var, dpdatNew);
                    }
                    LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
                }
            }
        }
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Data(org.apache.sysml.runtime.instructions.cp.Data) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 3 with DataPartitioner

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project incubator-systemml by apache.

the class ParForProgramBlock method createDataPartitioner.

/**
 * Creates a new data partitioner according to the specified runtime parameter.
 *
 * @param dpf data partition format
 * @param dataPartitioner data partitioner
 * @param ec execution context
 * @return data partitioner
 */
private DataPartitioner createDataPartitioner(PartitionFormat dpf, PDataPartitioner dataPartitioner, ExecutionContext ec) {
    DataPartitioner dp = null;
    // determine max degree of parallelism
    int numReducers = ConfigurationManager.getNumReducers();
    int maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
    // correction max number of reducers on yarn clusters
    if (InfrastructureAnalyzer.isYarnEnabled())
        maxNumRed = (int) Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2);
    int numRed = Math.min(numReducers, maxNumRed);
    // create data partitioner
    switch(dataPartitioner) {
        case LOCAL:
            dp = new DataPartitionerLocal(dpf, _numThreads);
            break;
        case REMOTE_MR:
            dp = new DataPartitionerRemoteMR(dpf, _ID, numRed, _replicationDP, ALLOW_REUSE_MR_JVMS, false);
            break;
        case REMOTE_SPARK:
            dp = new DataPartitionerRemoteSpark(dpf, ec, numRed, _replicationDP, false);
            break;
        default:
            throw new DMLRuntimeException("Unknown data partitioner: '" + dataPartitioner.name() + "'.");
    }
    return dp;
}
Also used : DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerLocal(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal) DataPartitionerRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSpark) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 4 with DataPartitioner

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project incubator-systemml by apache.

the class ParForProgramBlock method handleDataPartitioning.

private void handleDataPartitioning(ExecutionContext ec) {
    PDataPartitioner dataPartitioner = _dataPartitioner;
    if (dataPartitioner != PDataPartitioner.NONE) {
        ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
        if (sb == null)
            throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
        for (String var : sb.getReadOnlyParentVars()) {
            Data dat = ec.getVariable(var);
            // partitioning but typically related branches are never executed)
            if (dat != null && dat instanceof MatrixObject) {
                // unpartitioned input
                MatrixObject moVar = (MatrixObject) dat;
                PartitionFormat dpf = sb.determineDataPartitionFormat(var);
                LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
                if (dpf != PartitionFormat.NONE) {
                    if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
                        LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
                        dataPartitioner = PDataPartitioner.REMOTE_SPARK;
                    }
                    Timing ltime = new Timing(true);
                    // input data partitioning (reuse if possible)
                    Data dpdatNew = _variablesDPReuse.get(var);
                    if (// no reuse opportunity
                    dpdatNew == null) {
                        DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
                        // disable binary cell for sparse if consumed by MR jobs
                        if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
                        OptimizerUtils.isSparkExecutionMode()) {
                            dp.disableBinaryCell();
                        }
                        MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
                        dpdatNew = moVarNew;
                        // skip remaining partitioning logic if not partitioned (e.g., too small)
                        if (moVar == moVarNew)
                            // skip to next
                            continue;
                    }
                    ec.setVariable(var, dpdatNew);
                    // recompile parfor body program
                    ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
                    // store original and partitioned matrix (for reuse if applicable)
                    _variablesDPOriginal.put(var, moVar);
                    if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
                        _variablesDPReuse.put(var, dpdatNew);
                    }
                    LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
                }
            }
        }
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Data(org.apache.sysml.runtime.instructions.cp.Data) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 5 with DataPartitioner

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner in project systemml by apache.

the class ParForProgramBlock method createDataPartitioner.

/**
 * Creates a new data partitioner according to the specified runtime parameter.
 *
 * @param dpf data partition format
 * @param dataPartitioner data partitioner
 * @param ec execution context
 * @return data partitioner
 */
private DataPartitioner createDataPartitioner(PartitionFormat dpf, PDataPartitioner dataPartitioner, ExecutionContext ec) {
    DataPartitioner dp = null;
    // determine max degree of parallelism
    int numReducers = ConfigurationManager.getNumReducers();
    int maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
    // correction max number of reducers on yarn clusters
    if (InfrastructureAnalyzer.isYarnEnabled())
        maxNumRed = (int) Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2);
    int numRed = Math.min(numReducers, maxNumRed);
    // create data partitioner
    switch(dataPartitioner) {
        case LOCAL:
            dp = new DataPartitionerLocal(dpf, _numThreads);
            break;
        case REMOTE_MR:
            dp = new DataPartitionerRemoteMR(dpf, _ID, numRed, _replicationDP, ALLOW_REUSE_MR_JVMS, false);
            break;
        case REMOTE_SPARK:
            dp = new DataPartitionerRemoteSpark(dpf, ec, numRed, _replicationDP, false);
            break;
        default:
            throw new DMLRuntimeException("Unknown data partitioner: '" + dataPartitioner.name() + "'.");
    }
    return dp;
}
Also used : DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerLocal(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal) DataPartitionerRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSpark) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 DataPartitioner (org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner)6 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)4 DataPartitionerRemoteMR (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR)4 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)2 PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)2 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)2 DataPartitionerLocal (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal)2 DataPartitionerRemoteSpark (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSpark)2 Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)2 Data (org.apache.sysml.runtime.instructions.cp.Data)2