Search in sources :

Example 56 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetInPlaceResultIndexing.

// /////
// REWRITE set in-place result indexing
// /
protected void rewriteSetInPlaceResultIndexing(OptNode pn, double M, LocalVariableMap vars, HashSet<ResultVar> inPlaceResultVars, ExecutionContext ec) {
    // assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Set in-place result update is only applicable for a ParFor node.");
    boolean apply = false;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
    // note currently we decide for all result vars jointly, i.e.,
    // only if all fit pinned in remaining budget, we apply this rewrite.
    ArrayList<ResultVar> retVars = pfpb.getResultVariables();
    // compute total sum of pinned result variable memory
    double sum = computeTotalSizeResultVariables(retVars, vars, pfpb.getDegreeOfParallelism());
    // NOTE: currently this rule is too conservative (the result variable is assumed to be dense and
    // most importantly counted twice if this is part of the maximum operation)
    double totalMem = Math.max((M + sum), rComputeSumMemoryIntermediates(pn, new HashSet<ResultVar>()));
    // optimization decision
    if (// basic correctness constraint
    rHasOnlyInPlaceSafeLeftIndexing(pn, retVars)) {
        // result update in-place for MR/Spark (w/ remote memory constraint)
        if ((pfpb.getExecMode() == PExecMode.REMOTE_MR_DP || pfpb.getExecMode() == PExecMode.REMOTE_MR || pfpb.getExecMode() == PExecMode.REMOTE_SPARK_DP || pfpb.getExecMode() == PExecMode.REMOTE_SPARK) && totalMem < _rm) {
            apply = true;
        } else // result update in-place for CP (w/ local memory constraint)
        if (pfpb.getExecMode() == PExecMode.LOCAL && totalMem * pfpb.getDegreeOfParallelism() < _lm && // no forced mr/spark execution
        pn.isCPOnly()) {
            apply = true;
        }
    }
    // modify result variable meta data, if rewrite applied
    if (apply) {
        // will be serialized and transfered via symbol table
        for (ResultVar var : retVars) {
            Data dat = vars.get(var._name);
            if (dat instanceof MatrixObject)
                ((MatrixObject) dat).setUpdateType(UpdateType.INPLACE_PINNED);
        }
        inPlaceResultVars.addAll(retVars);
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set in-place result indexing' - result=" + apply + " (" + Arrays.toString(inPlaceResultVars.toArray(new ResultVar[0])) + ", M=" + toMB(totalMem) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 57 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method hasOnlyInMemoryResults.

protected boolean hasOnlyInMemoryResults(OptNode n, ArrayList<ResultVar> resultVars, LocalVariableMap vars, boolean inLocal) {
    boolean ret = true;
    if (n.isLeaf()) {
        String opName = n.getParam(ParamType.OPSTRING);
        // check opstring and exec type
        if (opName.equals(LeftIndexingOp.OPSTRING)) {
            LeftIndexingOp hop = (LeftIndexingOp) OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
            // check agains set of varname
            String varName = hop.getInput().get(0).getName();
            if (ResultVar.contains(resultVars, varName) && vars.keySet().contains(varName)) {
                // dims of result vars must be known at this point in time
                MatrixObject mo = (MatrixObject) vars.get(hop.getInput().get(0).getName());
                long rows = mo.getNumRows();
                long cols = mo.getNumColumns();
                double memBudget = inLocal ? OptimizerUtils.getLocalMemBudget() : OptimizerUtils.getRemoteMemBudgetMap();
                ret &= isInMemoryResultMerge(rows, cols, memBudget);
            }
        }
    } else {
        for (OptNode c : n.getChilds()) ret &= hasOnlyInMemoryResults(c, resultVars, vars, inLocal);
    }
    return ret;
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) LeftIndexingOp(org.apache.sysml.hops.LeftIndexingOp)

Example 58 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetPartitionReplicationFactor.

// /////
// REWRITE set partition replication factor
// /
/**
 * Increasing the partition replication factor is beneficial if partitions are
 * read multiple times (e.g., in nested loops) because partitioning (done once)
 * gets slightly slower but there is a higher probability for local access
 *
 * NOTE: this rewrite requires 'set data partitioner' to be executed in order to
 * leverage the partitioning information in the plan tree.
 *
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param partitionedMatrices map of data partition formats
 * @param vars local variable map
 */
protected void rewriteSetPartitionReplicationFactor(OptNode n, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) {
    boolean apply = false;
    double sizeReplicated = 0;
    int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    if (((n.getExecType() == ExecType.MR && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.name())) || (n.getExecType() == ExecType.SPARK && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_SPARK.name()))) && n.hasNestedParallelism(false) && n.hasNestedPartitionReads(false)) {
        apply = true;
        // account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);
        // account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_PARTITIONING);
        // account for remaining hdfs capacity
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(ConfigurationManager.getCachedJobConf());
            long hdfsCapacityRemain = fs.getStatus().getRemaining();
            // sum of all input sizes (w/o replication)
            long sizeInputs = 0;
            for (String var : partitionedMatrices.keySet()) {
                MatrixObject mo = (MatrixObject) vars.get(var);
                Path fname = new Path(mo.getFileName());
                if (// non-existing (e.g., CP) -> small file
                fs.exists(fname))
                    sizeInputs += fs.getContentSummary(fname).getLength();
            }
            replication = (int) Math.min(replication, Math.floor(0.9 * hdfsCapacityRemain / sizeInputs));
            // ensure at least replication 1
            replication = Math.max(replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR);
            sizeReplicated = replication * sizeInputs;
        } catch (Exception ex) {
            throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex);
        }
    }
    // modify the runtime plan
    if (apply)
        pfpb.setPartitionReplicationFactor(replication);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set partition replication factor' - result=" + apply + ((apply) ? " (" + replication + ", " + toMB(sizeReplicated) + ")" : ""));
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) FileSystem(org.apache.hadoop.fs.FileSystem) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 59 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteRemoveUnnecessaryCompareMatrix.

// /////
// REWRITE remove compare matrix (for result merge, needs to be invoked before setting result merge)
// /
protected void rewriteRemoveUnnecessaryCompareMatrix(OptNode n, ExecutionContext ec) {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    ArrayList<ResultVar> cleanedVars = new ArrayList<>();
    ArrayList<ResultVar> resultVars = pfpb.getResultVariables();
    String itervar = pfpb.getIterVar();
    for (ResultVar rvar : resultVars) {
        Data dat = ec.getVariable(rvar._name);
        if (// subject to result merge with compare
        dat instanceof MatrixObject && ((MatrixObject) dat).getNnz() != 0 && // guaranteed no conditional indexing
        n.hasOnlySimpleChilds() && // guaranteed full matrix replace
        rContainsResultFullReplace(n, rvar._name, itervar, (MatrixObject) dat) && // && !pfsb.variablesRead().containsVariable(rvar)                  //never read variable in loop body
        !// never read variable in loop body
        rIsReadInRightIndexing(n, rvar._name) && ((MatrixObject) dat).getNumRows() <= Integer.MAX_VALUE && ((MatrixObject) dat).getNumColumns() <= Integer.MAX_VALUE) {
            // replace existing matrix object with empty matrix
            MatrixObject mo = (MatrixObject) dat;
            ec.cleanupCacheableData(mo);
            ec.setMatrixOutput(rvar._name, new MatrixBlock((int) mo.getNumRows(), (int) mo.getNumColumns(), false), null);
            // keep track of cleaned result variables
            cleanedVars.add(rvar);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'remove unnecessary compare matrix' - result=" + (!cleanedVars.isEmpty()) + " (" + ProgramConverter.serializeResultVariables(cleanedVars) + ")");
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ArrayList(java.util.ArrayList) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 60 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method determineFlagCellFormatWoCompare.

protected boolean determineFlagCellFormatWoCompare(ArrayList<ResultVar> resultVars, LocalVariableMap vars) {
    boolean ret = true;
    for (ResultVar rVar : resultVars) {
        Data dat = vars.get(rVar._name);
        if (dat == null || !(dat instanceof MatrixObject)) {
            ret = false;
            break;
        } else {
            MatrixObject mo = (MatrixObject) dat;
            MetaDataFormat meta = (MetaDataFormat) mo.getMetaData();
            OutputInfo oi = meta.getOutputInfo();
            long nnz = meta.getMatrixCharacteristics().getNonZeros();
            if (oi == OutputInfo.BinaryBlockOutputInfo || nnz != 0) {
                ret = false;
                break;
            }
        }
    }
    return ret;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Data(org.apache.sysml.runtime.instructions.cp.Data)

Aggregations

MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)201 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)74 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)45 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)39 Data (org.apache.sysml.runtime.instructions.cp.Data)37 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)26 Pointer (jcuda.Pointer)20 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)20 IOException (java.io.IOException)17 ArrayList (java.util.ArrayList)16 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)14 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)13 CacheableData (org.apache.sysml.runtime.controlprogram.caching.CacheableData)12 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)12 Hop (org.apache.sysml.hops.Hop)11 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)11 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)10 Path (org.apache.hadoop.fs.Path)9 LongWritable (org.apache.hadoop.io.LongWritable)9