Search in sources :

Example 61 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetSparkEagerRDDCaching.

// /////
// REWRITE set spark eager rdd caching
// /
protected void rewriteSetSparkEagerRDDCaching(OptNode n, LocalVariableMap vars) {
    // get program blocks of root parfor
    Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
    ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
    ArrayList<String> ret = new ArrayList<>();
    if (// spark exec mode
    OptimizerUtils.isSparkExecutionMode() && // local parfor
    n.getExecType() == ExecType.CP && // at least 2 iterations
    _N > 1) {
        Set<String> cand = pfsb.variablesRead().getVariableNames();
        Collection<String> rpVars = pfpb.getSparkRepartitionVariables();
        for (String var : cand) {
            Data dat = vars.get(var);
            if (dat != null && dat instanceof MatrixObject && ((MatrixObject) dat).getRDDHandle() != null) {
                MatrixObject mo = (MatrixObject) dat;
                MatrixCharacteristics mc = mo.getMatrixCharacteristics();
                RDDObject rdd = mo.getRDDHandle();
                if (// not a repartition var
                (rpVars == null || !rpVars.contains(var)) && // is cached rdd
                rdd.rHasCheckpointRDDChilds() && // is out-of-core dataset
                _lm / n.getK() < OptimizerUtils.estimateSizeExactSparsity(mc)) {
                    ret.add(var);
                }
            }
        }
        // apply rewrite to parfor pb
        if (!ret.isEmpty()) {
            pfpb.setSparkEagerCacheVariables(ret);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set spark eager rdd caching' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ArrayList(java.util.ArrayList) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject)

Example 62 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteDataColocation.

// /////
// REWRITE enable data colocation
// /
/**
 * NOTE: if MAX_REPLICATION_FACTOR_PARTITIONING is set larger than 10, co-location may
 * throw warnings per split since this exceeds "max block locations"
 *
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param vars local variable map
 */
protected void rewriteDataColocation(OptNode n, LocalVariableMap vars) {
    // data colocation is beneficial if we have dp=REMOTE_MR, etype=REMOTE_MR
    // and there is at least one direct col-/row-wise access with the index variable
    // on the partitioned matrix
    boolean apply = false;
    String varname = null;
    String partitioner = n.getParam(ParamType.DATA_PARTITIONER);
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    if (partitioner != null && partitioner.equals(PDataPartitioner.REMOTE_MR.toString()) && n.getExecType() == ExecType.MR) {
        // find all candidates matrices (at least one partitioned access via iterVar)
        HashSet<String> cand = new HashSet<>();
        rFindDataColocationCandidates(n, cand, pfpb.getIterVar());
        // select largest matrix for colocation (based on nnz to account for sparsity)
        long nnzMax = Long.MIN_VALUE;
        for (String c : cand) {
            MatrixObject tmp = (MatrixObject) vars.get(c);
            if (tmp != null) {
                long nnzTmp = tmp.getNnz();
                if (nnzTmp > nnzMax) {
                    nnzMax = nnzTmp;
                    varname = c;
                    apply = true;
                }
            }
        }
    }
    // modify the runtime plan (apply true if at least one candidate)
    if (apply)
        pfpb.enableColocatedPartitionedMatrix(varname);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'enable data colocation' - result=" + apply + ((apply) ? " (" + varname + ")" : ""));
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 63 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class ExecutionContext method releaseMatrixInput.

public void releaseMatrixInput(String varName) {
    MatrixObject mo = getMatrixObject(varName);
    mo.release(null);
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject)

Example 64 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class SparkExecutionContext method cacheMatrixObject.

@SuppressWarnings("unchecked")
public void cacheMatrixObject(String var) {
    // get input rdd and default storage level
    MatrixObject mo = getMatrixObject(var);
    // double check size to avoid unnecessary spark context creation
    if (!OptimizerUtils.exceedsCachingThreshold(mo.getNumColumns(), (double) OptimizerUtils.estimateSizeExactSparsity(mo.getMatrixCharacteristics())))
        return;
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(mo, InputInfo.BinaryBlockInputInfo);
    // persist rdd (force rdd caching, if not already cached)
    if (!isRDDCached(in.id()))
        // trigger caching to prevent contention
        in.count();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD)

Example 65 with MatrixObject

use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.

the class InterProceduralAnalysis method extractExternalFunctionCallReturnStatistics.

private static void extractExternalFunctionCallReturnStatistics(ExternalFunctionStatement fstmt, FunctionOp fop, LocalVariableMap callVars) {
    String className = fstmt.getOtherParams().get(ExternalFunctionStatement.CLASS_NAME);
    if (className.equals(OrderWrapper.class.getName())) {
        Hop input = fop.getInput().get(0);
        long lnnz = className.equals(OrderWrapper.class.getName()) ? input.getNnz() : -1;
        MatrixObject moOut = createOutputMatrix(input.getDim1(), input.getDim2(), lnnz);
        callVars.put(fop.getOutputVariableNames()[0], moOut);
    } else if (className.equals(DynamicReadMatrixCP.class.getName()) || className.equals(DynamicReadMatrixRcCP.class.getName())) {
        // rows
        Hop input1 = fop.getInput().get(1);
        // cols
        Hop input2 = fop.getInput().get(2);
        if (input1 instanceof LiteralOp && input2 instanceof LiteralOp)
            callVars.put(fop.getOutputVariableNames()[0], createOutputMatrix(((LiteralOp) input1).getLongValue(), ((LiteralOp) input2).getLongValue(), -1));
    } else {
        extractFunctionCallUnknownReturnStatistics(fstmt, fop, callVars);
    }
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) OrderWrapper(org.apache.sysml.udf.lib.OrderWrapper) DynamicReadMatrixRcCP(org.apache.sysml.udf.lib.DynamicReadMatrixRcCP) Hop(org.apache.sysml.hops.Hop) DynamicReadMatrixCP(org.apache.sysml.udf.lib.DynamicReadMatrixCP) LiteralOp(org.apache.sysml.hops.LiteralOp)

Aggregations

MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)201 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)74 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)45 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)39 Data (org.apache.sysml.runtime.instructions.cp.Data)37 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)26 Pointer (jcuda.Pointer)20 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)20 IOException (java.io.IOException)17 ArrayList (java.util.ArrayList)16 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)14 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)13 CacheableData (org.apache.sysml.runtime.controlprogram.caching.CacheableData)12 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)12 Hop (org.apache.sysml.hops.Hop)11 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)11 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)10 Path (org.apache.hadoop.fs.Path)9 LongWritable (org.apache.hadoop.io.LongWritable)9