Search in sources :

Example 1 with DataPartitionerRemoteMR

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR in project incubator-systemml by apache.

the class DataPartitionMR method processPartitionInstructions.

private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
    int i = 0;
    for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
        if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
            // long begin = System.currentTimeMillis();
            String[] parts = InstructionUtils.getInstructionParts(inst);
            int input_index = Integer.parseInt(parts[1]);
            int output_index = Integer.parseInt(parts[2]);
            MatrixObject in = inputMatrices[input_index];
            MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
            PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
            long rlen = in.getNumRows();
            long clen = in.getNumColumns();
            long brlen = in.getNumRowsPerBlock();
            long bclen = in.getNumColumnsPerBlock();
            long N = -1;
            switch(pformat) {
                case ROW_BLOCK_WISE_N:
                    {
                        long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                        N = numRowBlocks * brlen;
                        break;
                    }
                case COLUMN_BLOCK_WISE_N:
                    {
                        long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                        N = numColBlocks * bclen;
                        break;
                    }
                default:
                    throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
            }
            PartitionFormat pf = new PartitionFormat(pformat, (int) N);
            DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
            out = dpart.createPartitionedMatrixObject(in, out, true);
            sts[i] = out.getMatrixCharacteristics();
            i++;
        }
    }
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with DataPartitionerRemoteMR

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR in project incubator-systemml by apache.

the class ParForProgramBlock method createDataPartitioner.

/**
 * Creates a new data partitioner according to the specified runtime parameter.
 *
 * @param dpf data partition format
 * @param dataPartitioner data partitioner
 * @param ec execution context
 * @return data partitioner
 */
private DataPartitioner createDataPartitioner(PartitionFormat dpf, PDataPartitioner dataPartitioner, ExecutionContext ec) {
    DataPartitioner dp = null;
    // determine max degree of parallelism
    int numReducers = ConfigurationManager.getNumReducers();
    int maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
    // correction max number of reducers on yarn clusters
    if (InfrastructureAnalyzer.isYarnEnabled())
        maxNumRed = (int) Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2);
    int numRed = Math.min(numReducers, maxNumRed);
    // create data partitioner
    switch(dataPartitioner) {
        case LOCAL:
            dp = new DataPartitionerLocal(dpf, _numThreads);
            break;
        case REMOTE_MR:
            dp = new DataPartitionerRemoteMR(dpf, _ID, numRed, _replicationDP, ALLOW_REUSE_MR_JVMS, false);
            break;
        case REMOTE_SPARK:
            dp = new DataPartitionerRemoteSpark(dpf, ec, numRed, _replicationDP, false);
            break;
        default:
            throw new DMLRuntimeException("Unknown data partitioner: '" + dataPartitioner.name() + "'.");
    }
    return dp;
}
Also used : DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerLocal(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal) DataPartitionerRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSpark) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 3 with DataPartitionerRemoteMR

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR in project systemml by apache.

the class ParForProgramBlock method createDataPartitioner.

/**
 * Creates a new data partitioner according to the specified runtime parameter.
 *
 * @param dpf data partition format
 * @param dataPartitioner data partitioner
 * @param ec execution context
 * @return data partitioner
 */
private DataPartitioner createDataPartitioner(PartitionFormat dpf, PDataPartitioner dataPartitioner, ExecutionContext ec) {
    DataPartitioner dp = null;
    // determine max degree of parallelism
    int numReducers = ConfigurationManager.getNumReducers();
    int maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
    // correction max number of reducers on yarn clusters
    if (InfrastructureAnalyzer.isYarnEnabled())
        maxNumRed = (int) Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2);
    int numRed = Math.min(numReducers, maxNumRed);
    // create data partitioner
    switch(dataPartitioner) {
        case LOCAL:
            dp = new DataPartitionerLocal(dpf, _numThreads);
            break;
        case REMOTE_MR:
            dp = new DataPartitionerRemoteMR(dpf, _ID, numRed, _replicationDP, ALLOW_REUSE_MR_JVMS, false);
            break;
        case REMOTE_SPARK:
            dp = new DataPartitionerRemoteSpark(dpf, ec, numRed, _replicationDP, false);
            break;
        default:
            throw new DMLRuntimeException("Unknown data partitioner: '" + dataPartitioner.name() + "'.");
    }
    return dp;
}
Also used : DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerLocal(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal) DataPartitionerRemoteSpark(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSpark) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 4 with DataPartitionerRemoteMR

use of org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR in project systemml by apache.

the class DataPartitionMR method processPartitionInstructions.

private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
    int i = 0;
    for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
        if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
            // long begin = System.currentTimeMillis();
            String[] parts = InstructionUtils.getInstructionParts(inst);
            int input_index = Integer.parseInt(parts[1]);
            int output_index = Integer.parseInt(parts[2]);
            MatrixObject in = inputMatrices[input_index];
            MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
            PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
            long rlen = in.getNumRows();
            long clen = in.getNumColumns();
            long brlen = in.getNumRowsPerBlock();
            long bclen = in.getNumColumnsPerBlock();
            long N = -1;
            switch(pformat) {
                case ROW_BLOCK_WISE_N:
                    {
                        long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                        N = numRowBlocks * brlen;
                        break;
                    }
                case COLUMN_BLOCK_WISE_N:
                    {
                        long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                        N = numColBlocks * bclen;
                        break;
                    }
                default:
                    throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
            }
            PartitionFormat pf = new PartitionFormat(pformat, (int) N);
            DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
            out = dpart.createPartitionedMatrixObject(in, out, true);
            sts[i] = out.getMatrixCharacteristics();
            i++;
        }
    }
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)4 DataPartitioner (org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner)4 DataPartitionerRemoteMR (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR)4 PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)2 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 DataPartitionerLocal (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerLocal)2 DataPartitionerRemoteSpark (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteSpark)2