Search in sources :

Example 1 with PDataPartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project incubator-systemml by apache.

the class DataPartitionerRemoteMapper method configure.

@Override
public void configure(JobConf job) {
    MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
    InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
    OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
    PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
    int n = MRJobConfiguration.getPartitioningSizeN(job);
    boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
    if (ii == InputInfo.TextCellInputInfo)
        _mapper = new DataPartitionerMapperTextcell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
    else if (ii == InputInfo.BinaryCellInputInfo)
        _mapper = new DataPartitionerMapperBinarycell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
    else if (ii == InputInfo.BinaryBlockInputInfo) {
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            _mapper = new DataPartitionerMapperBinaryblock(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes);
        else if (oi == OutputInfo.BinaryCellOutputInfo) {
            // fused parfor
            boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
            _mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes, outputEmpty);
        } else
            throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
    } else
        throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 2 with PDataPartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project incubator-systemml by apache.

the class DataPartitionCPInstruction method parseInstruction.

public static DataPartitionCPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    InstructionUtils.checkNumFields(parts, 3);
    String opcode = parts[0];
    CPOperand in1 = new CPOperand(parts[1]);
    CPOperand out = new CPOperand(parts[2]);
    PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
    if (!opcode.equalsIgnoreCase("partition"))
        throw new DMLRuntimeException("Unknown opcode while parsing an DataPartitionCPInstruction: " + str);
    else
        return new DataPartitionCPInstruction(new Operator(true), in1, pformat, out, opcode, str);
}
Also used : Operator(org.apache.sysml.runtime.matrix.operators.Operator) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 3 with PDataPartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project incubator-systemml by apache.

the class MRBaseForCommonInstructions method setupDistCacheFiles.

protected void setupDistCacheFiles(JobConf job) throws IOException {
    if (MRJobConfiguration.getDistCacheInputIndices(job) == null)
        return;
    // boolean isJobLocal = false;
    isJobLocal = InfrastructureAnalyzer.isLocalMode(job);
    String[] inputIndices = MRJobConfiguration.getInputPaths(job);
    String[] dcIndices = MRJobConfiguration.getDistCacheInputIndices(job).split(Instruction.INSTRUCTION_DELIM);
    Path[] dcFiles = DistributedCache.getLocalCacheFiles(job);
    PDataPartitionFormat[] inputPartitionFormats = MRJobConfiguration.getInputPartitionFormats(job);
    DistributedCacheInput[] dcInputs = new DistributedCacheInput[dcIndices.length];
    for (int i = 0; i < dcIndices.length; i++) {
        byte inputIndex = Byte.parseByte(dcIndices[i]);
        // load if not already present (jvm reuse)
        if (!dcValues.containsKey(inputIndex)) {
            // When the job is in local mode, files can be read from HDFS directly -- use
            // input paths as opposed to "local" paths prepared by DistributedCache.
            Path p = null;
            if (isJobLocal)
                p = new Path(inputIndices[Byte.parseByte(dcIndices[i])]);
            else
                p = dcFiles[i];
            dcInputs[i] = new DistributedCacheInput(p, // rlens[inputIndex],
            MRJobConfiguration.getNumRows(job, inputIndex), // clens[inputIndex],
            MRJobConfiguration.getNumColumns(job, inputIndex), // brlens[inputIndex],
            MRJobConfiguration.getNumRowsPerBlock(job, inputIndex), // bclens[inputIndex],
            MRJobConfiguration.getNumColumnsPerBlock(job, inputIndex), inputPartitionFormats[inputIndex]);
            dcValues.put(inputIndex, dcInputs[i]);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)

Example 4 with PDataPartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project incubator-systemml by apache.

the class DataPartitionMR method processPartitionInstructions.

private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
    int i = 0;
    for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
        if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
            // long begin = System.currentTimeMillis();
            String[] parts = InstructionUtils.getInstructionParts(inst);
            int input_index = Integer.parseInt(parts[1]);
            int output_index = Integer.parseInt(parts[2]);
            MatrixObject in = inputMatrices[input_index];
            MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
            PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
            long rlen = in.getNumRows();
            long clen = in.getNumColumns();
            long brlen = in.getNumRowsPerBlock();
            long bclen = in.getNumColumnsPerBlock();
            long N = -1;
            switch(pformat) {
                case ROW_BLOCK_WISE_N:
                    {
                        long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                        N = numRowBlocks * brlen;
                        break;
                    }
                case COLUMN_BLOCK_WISE_N:
                    {
                        long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                        N = numColBlocks * bclen;
                        break;
                    }
                default:
                    throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
            }
            PartitionFormat pf = new PartitionFormat(pformat, (int) N);
            DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
            out = dpart.createPartitionedMatrixObject(in, out, true);
            sts[i] = out.getMatrixCharacteristics();
            i++;
        }
    }
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 5 with PDataPartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat in project systemml by apache.

the class RemoteParForColocatedNLineInputFormat method getSplits.

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    InputSplit[] tmp = super.getSplits(job, numSplits);
    // get partitioning information
    MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
    PDataPartitionFormat dpf = MRJobConfiguration.getPartitioningFormat(job);
    PartitionFormat pf = new PartitionFormat(dpf, -1);
    int blen = (int) (pf.isRowwise() ? pf.getNumRows(mc) : pf.getNumColumns(mc));
    String fname = MRJobConfiguration.getPartitioningFilename(job);
    // create wrapper splits
    InputSplit[] ret = new InputSplit[tmp.length];
    for (int i = 0; i < tmp.length; i++) {
        // check for robustness of subsequent cast
        if (tmp[i] instanceof FileSplit)
            ret[i] = new RemoteParForColocatedFileSplit((FileSplit) tmp[i], fname, blen);
        else
            ret[i] = tmp[i];
    }
    return ret;
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) FileSplit(org.apache.hadoop.mapred.FileSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)4 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)4 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)4 Path (org.apache.hadoop.fs.Path)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 DataPartitioner (org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner)2 DataPartitionerRemoteMR (org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR)2 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)2 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)2 Operator (org.apache.sysml.runtime.matrix.operators.Operator)2