Search in sources :

Example 21 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class ParForStatementBlock method determineDataPartitionFormat.

/**
 * Determines the PDataPartitioningFormat for read-only parent variables according
 * to the access pattern of that variable within the parfor statement block.
 * Row-wise or column wise partitioning is only suggested if we see pure row-wise or
 * column-wise access patterns.
 *
 * @param var variables
 * @return partition format
 */
public PartitionFormat determineDataPartitionFormat(String var) {
    PartitionFormat dpf = null;
    List<PartitionFormat> dpfc = new LinkedList<>();
    try {
        // determine partitioning candidates
        ParForStatement dpfs = (ParForStatement) _statements.get(0);
        rDeterminePartitioningCandidates(var, dpfs.getBody(), dpfc);
        // determine final solution
        for (PartitionFormat tmp : dpfc) dpf = // if no consensus
        (dpf != null && !dpf.equals(tmp)) ? PartitionFormat.NONE : tmp;
        if (dpf == null)
            dpf = PartitionFormat.NONE;
    } catch (LanguageException e) {
        LOG.trace("Unable to determine partitioning candidates.", e);
        dpf = PartitionFormat.NONE;
    }
    return dpf;
}
Also used : PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) LinkedList(java.util.LinkedList)

Example 22 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class ParForStatementBlock method determineAccessPattern.

private PartitionFormat determineAccessPattern(IndexedIdentifier dat) {
    boolean isSpark = OptimizerUtils.isSparkExecutionMode();
    int blksz = ConfigurationManager.getBlocksize();
    PartitionFormat dpf = null;
    // 1) get all bounds expressions for index access
    Expression rowL = dat.getRowLowerBound();
    Expression rowU = dat.getRowUpperBound();
    Expression colL = dat.getColLowerBound();
    Expression colU = dat.getColUpperBound();
    boolean allRows = (rowL == null && rowU == null);
    boolean allCols = (colL == null && colU == null);
    try {
        // COLUMN_WISE if all rows and access to single column
        if (allRows && colL != null && colL.equals(colU)) {
            dpf = PartitionFormat.COLUMN_WISE;
        } else // ROW_WISE if all cols and access to single row
        if (allCols && rowL != null && rowL.equals(rowU)) {
            dpf = PartitionFormat.ROW_WISE;
        } else // COLUMN_BLOCK_WISE
        if (isSpark && allRows && colL != colU) {
            LinearFunction l1 = getLinearFunction(colL, true);
            LinearFunction l2 = getLinearFunction(colU, true);
            dpf = !isAlignedBlocking(l1, l2, blksz) ? PartitionFormat.NONE : new PartitionFormat(PDataPartitionFormat.COLUMN_BLOCK_WISE_N, (int) l1._b[0]);
        } else // ROW_BLOCK_WISE
        if (isSpark && allCols && rowL != rowU) {
            LinearFunction l1 = getLinearFunction(rowL, true);
            LinearFunction l2 = getLinearFunction(rowU, true);
            dpf = !isAlignedBlocking(l1, l2, blksz) ? PartitionFormat.NONE : new PartitionFormat(PDataPartitionFormat.ROW_BLOCK_WISE_N, (int) l1._b[0]);
        } else
            // NONE otherwise (conservative)
            dpf = PartitionFormat.NONE;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    return dpf;
}
Also used : PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)

Example 23 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class ProgramConverter method serializeDataObject.

public static String serializeDataObject(String key, Data dat) {
    // SCHEMA: <name>|<datatype>|<valuetype>|value
    // (scalars are serialize by value, matrices by filename)
    StringBuilder sb = new StringBuilder();
    // prepare data for serialization
    String name = key;
    DataType datatype = dat.getDataType();
    ValueType valuetype = dat.getValueType();
    String value = null;
    String[] matrixMetaData = null;
    switch(datatype) {
        case SCALAR:
            ScalarObject so = (ScalarObject) dat;
            // name = so.getName();
            value = so.getStringValue();
            break;
        case MATRIX:
            MatrixObject mo = (MatrixObject) dat;
            MetaDataFormat md = (MetaDataFormat) dat.getMetaData();
            MatrixCharacteristics mc = md.getMatrixCharacteristics();
            value = mo.getFileName();
            PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
            matrixMetaData = new String[9];
            matrixMetaData[0] = String.valueOf(mc.getRows());
            matrixMetaData[1] = String.valueOf(mc.getCols());
            matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
            matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
            matrixMetaData[4] = String.valueOf(mc.getNonZeros());
            matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
            matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
            matrixMetaData[7] = String.valueOf(partFormat);
            matrixMetaData[8] = String.valueOf(mo.getUpdateType());
            break;
        default:
            throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
    }
    // serialize data
    sb.append(name);
    sb.append(DATA_FIELD_DELIM);
    sb.append(datatype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(valuetype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(value);
    if (matrixMetaData != null)
        for (int i = 0; i < matrixMetaData.length; i++) {
            sb.append(DATA_FIELD_DELIM);
            sb.append(matrixMetaData[i]);
        }
    return sb.toString();
}
Also used : ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DataType(org.apache.sysml.parser.Expression.DataType) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 24 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class DataPartitionMR method processPartitionInstructions.

private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
    int i = 0;
    for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
        if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
            // long begin = System.currentTimeMillis();
            String[] parts = InstructionUtils.getInstructionParts(inst);
            int input_index = Integer.parseInt(parts[1]);
            int output_index = Integer.parseInt(parts[2]);
            MatrixObject in = inputMatrices[input_index];
            MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
            PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
            long rlen = in.getNumRows();
            long clen = in.getNumColumns();
            long brlen = in.getNumRowsPerBlock();
            long bclen = in.getNumColumnsPerBlock();
            long N = -1;
            switch(pformat) {
                case ROW_BLOCK_WISE_N:
                    {
                        long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                        N = numRowBlocks * brlen;
                        break;
                    }
                case COLUMN_BLOCK_WISE_N:
                    {
                        long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                        N = numColBlocks * bclen;
                        break;
                    }
                default:
                    throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
            }
            PartitionFormat pf = new PartitionFormat(pformat, (int) N);
            DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
            out = dpart.createPartitionedMatrixObject(in, out, true);
            sts[i] = out.getMatrixCharacteristics();
            i++;
        }
    }
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)24 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)24 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)8 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)8 HashMap (java.util.HashMap)4 DataType (org.apache.sysml.parser.Expression.DataType)4 ValueType (org.apache.sysml.parser.Expression.ValueType)4 PDataPartitioner (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner)4 PExecMode (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)4 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 LinkedList (java.util.LinkedList)2 StringTokenizer (java.util.StringTokenizer)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2