Search in sources :

Example 1 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class ProgramConverter method serializeDataObject.

public static String serializeDataObject(String key, Data dat) {
    // SCHEMA: <name>|<datatype>|<valuetype>|value
    // (scalars are serialize by value, matrices by filename)
    StringBuilder sb = new StringBuilder();
    // prepare data for serialization
    String name = key;
    DataType datatype = dat.getDataType();
    ValueType valuetype = dat.getValueType();
    String value = null;
    String[] matrixMetaData = null;
    switch(datatype) {
        case SCALAR:
            ScalarObject so = (ScalarObject) dat;
            // name = so.getName();
            value = so.getStringValue();
            break;
        case MATRIX:
            MatrixObject mo = (MatrixObject) dat;
            MetaDataFormat md = (MetaDataFormat) dat.getMetaData();
            MatrixCharacteristics mc = md.getMatrixCharacteristics();
            value = mo.getFileName();
            PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
            matrixMetaData = new String[9];
            matrixMetaData[0] = String.valueOf(mc.getRows());
            matrixMetaData[1] = String.valueOf(mc.getCols());
            matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
            matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
            matrixMetaData[4] = String.valueOf(mc.getNonZeros());
            matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
            matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
            matrixMetaData[7] = String.valueOf(partFormat);
            matrixMetaData[8] = String.valueOf(mo.getUpdateType());
            break;
        default:
            throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
    }
    // serialize data
    sb.append(name);
    sb.append(DATA_FIELD_DELIM);
    sb.append(datatype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(valuetype);
    sb.append(DATA_FIELD_DELIM);
    sb.append(value);
    if (matrixMetaData != null)
        for (int i = 0; i < matrixMetaData.length; i++) {
            sb.append(DATA_FIELD_DELIM);
            sb.append(matrixMetaData[i]);
        }
    return sb.toString();
}
Also used : ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DataType(org.apache.sysml.parser.Expression.DataType) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class RemoteDPParWorkerReducer method configure.

@Override
public void configure(JobConf job) {
    // Step 1: configure data partitioning information
    _dpf = MRJobConfiguration.getPartitioningFormat(job);
    MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
    PartitionFormat pf = new PartitionFormat(_dpf, MRJobConfiguration.getPartitioningSizeN(job));
    _rlen = (int) pf.getNumRows(mc);
    _clen = (int) pf.getNumColumns(mc);
    _brlen = mc.getRowsPerBlock();
    _bclen = mc.getColsPerBlock();
    _iterVar = MRJobConfiguration.getPartitioningItervar(job);
    _inputVar = MRJobConfiguration.getPartitioningMatrixvar(job);
    _info = MRJobConfiguration.getPartitioningOutputInfo(job);
    _tSparseCol = MRJobConfiguration.getPartitioningTransposedCol(job);
    if (_tSparseCol)
        _partition = new MatrixBlock((int) _clen, _rlen, true);
    else
        _partition = new MatrixBlock((int) _rlen, _clen, false);
    // Step 1: configure parworker
    String taskID = job.get(MRConfigurationNames.MR_TASK_ID);
    LOG.trace("configure RemoteDPParWorkerReducer " + taskID);
    try {
        _stringID = taskID;
        // int task ID
        _workerID = IDHandler.extractIntID(_stringID);
        // in the context of mr jobs (for example this config points to local fs instead of hdfs by default).
        if (!InfrastructureAnalyzer.isLocalMode(job)) {
            ConfigurationManager.setCachedJobConf(job);
        }
        // create local runtime program
        String in = MRJobConfiguration.getProgramBlocks(job);
        ParForBody body = ProgramConverter.parseParForBody(in, (int) _workerID);
        _childBlocks = body.getChildBlocks();
        _ec = body.getEc();
        _resultVars = body.getResultVariables();
        // init local cache manager
        if (!CacheableData.isCachingActive()) {
            String uuid = IDHandler.createDistributedUniqueID();
            LocalFileUtils.createWorkingDirectoryWithUUID(uuid);
            // incl activation, cache dir creation (each map task gets its own dir for simplified cleanup)
            CacheableData.initCaching(uuid);
        }
        if (!CacheableData.cacheEvictionLocalFilePrefix.contains("_")) {
            // account for local mode
            CacheableData.cacheEvictionLocalFilePrefix = CacheableData.cacheEvictionLocalFilePrefix + "_" + _workerID;
        }
        // ensure that resultvar files are not removed
        super.pinResultVariables();
        // enable/disable caching (if required)
        boolean cpCaching = MRJobConfiguration.getParforCachingConfig(job);
        if (!cpCaching)
            CacheableData.disableCaching();
        _numTasks = 0;
        _numIters = 0;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    // disable parfor stat monitoring, reporting execution times via counters not useful
    StatisticMonitor.disableStatMonitoring();
    // always reset stats because counters per map task (for case of JVM reuse)
    if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode(job))
        Statistics.reset();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 3 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class OptimizerConstrained method optimize.

/**
 * Main optimization procedure.
 *
 * Transformation-based heuristic (rule-based) optimization
 * (no use of sb, direct change of pb).
 */
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec) {
    LOG.debug("--- " + getOptMode() + " OPTIMIZER -------");
    OptNode pn = plan.getRoot();
    // early abort for empty parfor body
    if (pn.isLeaf())
        return true;
    // ANALYZE infrastructure properties
    super.analyzeProblemAndInfrastructure(pn);
    _cost = est;
    // debug and warnings output
    LOG.debug(getOptMode() + " OPT: Optimize with local_max_mem=" + toMB(_lm) + " and remote_max_mem=" + toMB(_rm) + ").");
    if (_rnk <= 0 || _rk <= 0)
        LOG.warn(getOptMode() + " OPT: Optimize for inactive cluster (num_nodes=" + _rnk + ", num_map_slots=" + _rk + ").");
    // ESTIMATE memory consumption
    ExecType oldET = pn.getExecType();
    int oldK = pn.getK();
    // for basic mem consumption
    pn.setSerialParFor();
    double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    pn.setExecType(oldET);
    pn.setK(oldK);
    LOG.debug(getOptMode() + " OPT: estimated mem (serial exec) M=" + toMB(M0a));
    // OPTIMIZE PARFOR PLAN
    // rewrite 1: data partitioning (incl. log. recompile RIX)
    HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>();
    rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget());
    // reestimate
    double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    // rewrite 2: remove unnecessary compare matrix
    rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
    // rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
    boolean flagLIX = super.rewriteSetResultPartitioning(pn, M0b, ec.getVariables());
    // reestimate
    double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec) M=" + toMB(M1));
    // determine memory consumption for what-if: all-cp or partitioned
    double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec, all CP) M=" + toMB(M2));
    double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
    LOG.debug(getOptMode() + " OPT: estimated new mem (cond partitioning) M=" + toMB(M3));
    // rewrite 4: execution strategy
    // keep old
    PExecMode tmpmode = getPExecMode(pn);
    boolean flagRecompMR = rewriteSetExecutionStategy(pn, M0a, M1, M2, M3, flagLIX);
    // exec-type-specific rewrites
    if (pn.getExecType() == getRemoteExecType()) {
        if (M1 > _rm && M3 <= _rm) {
            // rewrite 1: data partitioning (apply conditional partitioning)
            rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, M3);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        if (flagRecompMR) {
            // rewrite 5: set operations exec type
            rewriteSetOperationsExecType(pn, flagRecompMR);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        // rewrite 6: data colocation
        super.rewriteDataColocation(pn, ec.getVariables());
        // rewrite 7: rewrite set partition replication factor
        super.rewriteSetPartitionReplicationFactor(pn, partitionedMatrices, ec.getVariables());
        // rewrite 8: rewrite set partition replication factor
        super.rewriteSetExportReplicationFactor(pn, ec.getVariables());
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        rewriteSetTaskPartitioner(pn, false, flagLIX);
        // rewrite 12: fused data partitioning and execution
        rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables(), tmpmode);
        // rewrite 13: transpose sparse vector operations
        super.rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
        // rewrite 14:
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        // rewrite 15:
        super.rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
    } else // if( pn.getExecType() == ExecType.CP )
    {
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        // flagLIX always false
        rewriteSetTaskPartitioner(pn, false, false);
        // rewrite 14: set in-place result indexing
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        if (!OptimizerUtils.isSparkExecutionMode()) {
            // rewrite 16: runtime piggybacking
            super.rewriteEnableRuntimePiggybacking(pn, ec.getVariables(), partitionedMatrices);
        } else {
            // rewrite 17: checkpoint injection for parfor loop body
            super.rewriteInjectSparkLoopCheckpointing(pn);
            // rewrite 18: repartition read-only inputs for zipmm
            super.rewriteInjectSparkRepartition(pn, ec.getVariables());
            // rewrite 19: eager caching for checkpoint rdds
            super.rewriteSetSparkEagerRDDCaching(pn, ec.getVariables());
        }
    }
    // rewrite 20: set result merge
    rewriteSetResultMerge(pn, ec.getVariables(), true);
    // rewrite 21: set local recompile memory budget
    super.rewriteSetRecompileMemoryBudget(pn);
    // /////
    // Final rewrites for cleanup / minor improvements
    // rewrite 22: parfor (in recursive functions) to for
    super.rewriteRemoveRecursiveParFor(pn, ec.getVariables());
    // rewrite 23: parfor (par=1) to for
    super.rewriteRemoveUnnecessaryParFor(pn);
    // info optimization result
    _numEvaluatedPlans = 1;
    return true;
}
Also used : HashMap(java.util.HashMap) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) HashSet(java.util.HashSet)

Example 4 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class DataPartitionMR method processPartitionInstructions.

private static void processPartitionInstructions(String shuffleInst, MatrixObject[] inputMatrices, byte[] resultIndices, MatrixObject[] outputMatrices, int numReducers, int replication, MatrixCharacteristics[] sts) {
    int i = 0;
    for (String inst : shuffleInst.split(Instruction.INSTRUCTION_DELIM)) {
        if (InstructionUtils.getOpCode(inst).equalsIgnoreCase("partition")) {
            // long begin = System.currentTimeMillis();
            String[] parts = InstructionUtils.getInstructionParts(inst);
            int input_index = Integer.parseInt(parts[1]);
            int output_index = Integer.parseInt(parts[2]);
            MatrixObject in = inputMatrices[input_index];
            MatrixObject out = outputMatrices[findResultIndex(resultIndices, output_index)];
            PDataPartitionFormat pformat = PDataPartitionFormat.valueOf(parts[3]);
            long rlen = in.getNumRows();
            long clen = in.getNumColumns();
            long brlen = in.getNumRowsPerBlock();
            long bclen = in.getNumColumnsPerBlock();
            long N = -1;
            switch(pformat) {
                case ROW_BLOCK_WISE_N:
                    {
                        long numRowBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                        N = numRowBlocks * brlen;
                        break;
                    }
                case COLUMN_BLOCK_WISE_N:
                    {
                        long numColBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                        N = numColBlocks * bclen;
                        break;
                    }
                default:
                    throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
            }
            PartitionFormat pf = new PartitionFormat(pformat, (int) N);
            DataPartitioner dpart = new DataPartitionerRemoteMR(pf, -1, numReducers, replication, false, true);
            out = dpart.createPartitionedMatrixObject(in, out, true);
            sts[i] = out.getMatrixCharacteristics();
            i++;
        }
    }
}
Also used : PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DataPartitioner(org.apache.sysml.runtime.controlprogram.parfor.DataPartitioner) DataPartitionerRemoteMR(org.apache.sysml.runtime.controlprogram.parfor.DataPartitionerRemoteMR) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 5 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class ProgramConverter method parseDataObject.

/**
 * NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
 * related symbol tables can be hierarchically structured.
 *
 * @param in data object as string
 * @return array of objects
 */
public static Object[] parseDataObject(String in) {
    Object[] ret = new Object[2];
    StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
    String name = st.nextToken();
    DataType datatype = DataType.valueOf(st.nextToken());
    ValueType valuetype = ValueType.valueOf(st.nextToken());
    String valString = st.hasMoreTokens() ? st.nextToken() : "";
    Data dat = null;
    switch(datatype) {
        case SCALAR:
            {
                switch(valuetype) {
                    case INT:
                        dat = new IntObject(Long.parseLong(valString));
                        break;
                    case DOUBLE:
                        dat = new DoubleObject(Double.parseDouble(valString));
                        break;
                    case BOOLEAN:
                        dat = new BooleanObject(Boolean.parseBoolean(valString));
                        break;
                    case STRING:
                        dat = new StringObject(valString);
                        break;
                    default:
                        throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
                }
                break;
            }
        case MATRIX:
            {
                MatrixObject mo = new MatrixObject(valuetype, valString);
                long rows = Long.parseLong(st.nextToken());
                long cols = Long.parseLong(st.nextToken());
                int brows = Integer.parseInt(st.nextToken());
                int bcols = Integer.parseInt(st.nextToken());
                long nnz = Long.parseLong(st.nextToken());
                InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
                OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
                PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
                UpdateType inplace = UpdateType.valueOf(st.nextToken());
                MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
                MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
                mo.setMetaData(md);
                if (partFormat._dpf != PDataPartitionFormat.NONE)
                    mo.setPartitioned(partFormat._dpf, partFormat._N);
                mo.setUpdateType(inplace);
                dat = mo;
                break;
            }
        default:
            throw new DMLRuntimeException("Unable to parse datatype " + datatype);
    }
    ret[0] = name;
    ret[1] = dat;
    return ret;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) Data(org.apache.sysml.runtime.instructions.cp.Data) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) StringTokenizer(java.util.StringTokenizer) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) DataType(org.apache.sysml.parser.Expression.DataType) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject)

Aggregations

PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)24 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)24 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)8 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)8 HashMap (java.util.HashMap)4 DataType (org.apache.sysml.parser.Expression.DataType)4 ValueType (org.apache.sysml.parser.Expression.ValueType)4 PDataPartitioner (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner)4 PExecMode (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)4 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 LinkedList (java.util.LinkedList)2 StringTokenizer (java.util.StringTokenizer)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2