use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetSparkEagerRDDCaching.
// /////
// REWRITE set spark eager rdd caching
// /
protected void rewriteSetSparkEagerRDDCaching(OptNode n, LocalVariableMap vars) {
// get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
ArrayList<String> ret = new ArrayList<>();
if (// spark exec mode
OptimizerUtils.isSparkExecutionMode() && // local parfor
n.getExecType() == ExecType.CP && // at least 2 iterations
_N > 1) {
Set<String> cand = pfsb.variablesRead().getVariableNames();
Collection<String> rpVars = pfpb.getSparkRepartitionVariables();
for (String var : cand) {
Data dat = vars.get(var);
if (dat != null && dat instanceof MatrixObject && ((MatrixObject) dat).getRDDHandle() != null) {
MatrixObject mo = (MatrixObject) dat;
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
RDDObject rdd = mo.getRDDHandle();
if (// not a repartition var
(rpVars == null || !rpVars.contains(var)) && // is cached rdd
rdd.rHasCheckpointRDDChilds() && // is out-of-core dataset
_lm / n.getK() < OptimizerUtils.estimateSizeExactSparsity(mc)) {
ret.add(var);
}
}
}
// apply rewrite to parfor pb
if (!ret.isEmpty()) {
pfpb.setSparkEagerCacheVariables(ret);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set spark eager rdd caching' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteDataColocation.
// /////
// REWRITE enable data colocation
// /
/**
* NOTE: if MAX_REPLICATION_FACTOR_PARTITIONING is set larger than 10, co-location may
* throw warnings per split since this exceeds "max block locations"
*
* @param n internal representation of a plan alternative for program blocks and instructions
* @param vars local variable map
*/
protected void rewriteDataColocation(OptNode n, LocalVariableMap vars) {
// data colocation is beneficial if we have dp=REMOTE_MR, etype=REMOTE_MR
// and there is at least one direct col-/row-wise access with the index variable
// on the partitioned matrix
boolean apply = false;
String varname = null;
String partitioner = n.getParam(ParamType.DATA_PARTITIONER);
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
if (partitioner != null && partitioner.equals(PDataPartitioner.REMOTE_MR.toString()) && n.getExecType() == ExecType.MR) {
// find all candidates matrices (at least one partitioned access via iterVar)
HashSet<String> cand = new HashSet<>();
rFindDataColocationCandidates(n, cand, pfpb.getIterVar());
// select largest matrix for colocation (based on nnz to account for sparsity)
long nnzMax = Long.MIN_VALUE;
for (String c : cand) {
MatrixObject tmp = (MatrixObject) vars.get(c);
if (tmp != null) {
long nnzTmp = tmp.getNnz();
if (nnzTmp > nnzMax) {
nnzMax = nnzTmp;
varname = c;
apply = true;
}
}
}
}
// modify the runtime plan (apply true if at least one candidate)
if (apply)
pfpb.enableColocatedPartitionedMatrix(varname);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'enable data colocation' - result=" + apply + ((apply) ? " (" + varname + ")" : ""));
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class ExecutionContext method releaseMatrixInput.
public void releaseMatrixInput(String varName) {
MatrixObject mo = getMatrixObject(varName);
mo.release(null);
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class SparkExecutionContext method cacheMatrixObject.
@SuppressWarnings("unchecked")
public void cacheMatrixObject(String var) {
// get input rdd and default storage level
MatrixObject mo = getMatrixObject(var);
// double check size to avoid unnecessary spark context creation
if (!OptimizerUtils.exceedsCachingThreshold(mo.getNumColumns(), (double) OptimizerUtils.estimateSizeExactSparsity(mo.getMatrixCharacteristics())))
return;
JavaPairRDD<MatrixIndexes, MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(mo, InputInfo.BinaryBlockInputInfo);
// persist rdd (force rdd caching, if not already cached)
if (!isRDDCached(in.id()))
// trigger caching to prevent contention
in.count();
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class InterProceduralAnalysis method extractExternalFunctionCallReturnStatistics.
private static void extractExternalFunctionCallReturnStatistics(ExternalFunctionStatement fstmt, FunctionOp fop, LocalVariableMap callVars) {
String className = fstmt.getOtherParams().get(ExternalFunctionStatement.CLASS_NAME);
if (className.equals(OrderWrapper.class.getName())) {
Hop input = fop.getInput().get(0);
long lnnz = className.equals(OrderWrapper.class.getName()) ? input.getNnz() : -1;
MatrixObject moOut = createOutputMatrix(input.getDim1(), input.getDim2(), lnnz);
callVars.put(fop.getOutputVariableNames()[0], moOut);
} else if (className.equals(DynamicReadMatrixCP.class.getName()) || className.equals(DynamicReadMatrixRcCP.class.getName())) {
// rows
Hop input1 = fop.getInput().get(1);
// cols
Hop input2 = fop.getInput().get(2);
if (input1 instanceof LiteralOp && input2 instanceof LiteralOp)
callVars.put(fop.getOutputVariableNames()[0], createOutputMatrix(((LiteralOp) input1).getLongValue(), ((LiteralOp) input2).getLongValue(), -1));
} else {
extractFunctionCallUnknownReturnStatistics(fstmt, fop, callVars);
}
}
Aggregations