use of org.apache.sysml.hops.DataOp in project systemml by apache.
the class RewriteInjectSparkLoopCheckpointing method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
if (!OptimizerUtils.isSparkExecutionMode()) {
// nothing to do here, return original statement block
return Arrays.asList(sb);
}
// 1) We currently add checkpoint operations without information about the global program structure,
// this assumes that redundant checkpointing is prevented at runtime level (instruction-level)
// 2) Also, we do not take size information into account right now. This means that all candidates
// are checkpointed even if they are only used by CP operations.
ArrayList<StatementBlock> ret = new ArrayList<>();
// block size set by reblock rewrite
int blocksize = status.getBlocksize();
// optimization because otherwise we would prevent remote parfor)
if (// incl parfor
(sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) && (_checkCtx ? !status.isInParforContext() : true)) {
// step 1: determine checkpointing candidates
ArrayList<String> candidates = new ArrayList<>();
VariableSet read = sb.variablesRead();
VariableSet updated = sb.variablesUpdated();
for (String rvar : read.getVariableNames()) if (!updated.containsVariable(rvar) && read.getVariable(rvar).getDataType() == DataType.MATRIX)
candidates.add(rvar);
// step 2: insert statement block with checkpointing operations
if (// existing candidates
!candidates.isEmpty()) {
StatementBlock sb0 = new StatementBlock();
sb0.setDMLProg(sb.getDMLProg());
sb0.setParseInfo(sb);
ArrayList<Hop> hops = new ArrayList<>();
VariableSet livein = new VariableSet();
VariableSet liveout = new VariableSet();
for (String var : candidates) {
DataIdentifier dat = read.getVariable(var);
long dim1 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim1() : dat.getDim1();
long dim2 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim2() : dat.getDim2();
DataOp tread = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, DataOpTypes.TRANSIENTREAD, dat.getFilename(), dim1, dim2, dat.getNnz(), blocksize, blocksize);
tread.setRequiresCheckpoint(true);
DataOp twrite = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, tread, DataOpTypes.TRANSIENTWRITE, null);
HopRewriteUtils.setOutputParameters(twrite, dim1, dim2, blocksize, blocksize, dat.getNnz());
hops.add(twrite);
livein.addVariable(var, read.getVariable(var));
liveout.addVariable(var, read.getVariable(var));
}
sb0.setHops(hops);
sb0.setLiveIn(livein);
sb0.setLiveOut(liveout);
sb0.setSplitDag(true);
ret.add(sb0);
// maintain rewrite status
status.setInjectedCheckpoints();
}
}
// add original statement block to end
ret.add(sb);
return ret;
}
use of org.apache.sysml.hops.DataOp in project systemml by apache.
the class RewriteRemovePersistentReadWrite method rule_RemovePersistentDataOp.
private void rule_RemovePersistentDataOp(Hop hop) {
// check mark processed
if (hop.isVisited())
return;
// recursively process childs
ArrayList<Hop> inputs = hop.getInput();
for (int i = 0; i < inputs.size(); i++) rule_RemovePersistentDataOp(inputs.get(i));
// remove cast if unnecessary
if (hop instanceof DataOp) {
DataOp dop = (DataOp) hop;
DataOpTypes dotype = dop.getDataOpType();
switch(dotype) {
case PERSISTENTREAD:
if (_inputs.contains(dop.getName())) {
dop.setDataOpType(DataOpTypes.TRANSIENTREAD);
if (hop.getDataType() == DataType.SCALAR) {
dop.removeInput("iofilename");
}
// disable unnecessary reblock of binary block w/ equal block sizes
if (dop.requiresReblock() && _inputsMeta.containsKey(dop.getName()) && _inputsMeta.get(dop.getName()) instanceof MetaDataFormat) {
MetaDataFormat meta = (MetaDataFormat) _inputsMeta.get(dop.getName());
MatrixCharacteristics mc = meta.getMatrixCharacteristics();
boolean matchingBlksz = mc.getRowsPerBlock() == dop.getRowsInBlock() && mc.getColsPerBlock() == dop.getColsInBlock();
// binary matrix w/ matching dims and frames do not require reblock
if (meta.getInputInfo() == InputInfo.BinaryBlockInputInfo && (matchingBlksz || dop.getDataType() == DataType.FRAME)) {
dop.setRequiresReblock(false);
}
}
} else
LOG.warn("Non-registered persistent read of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
break;
case PERSISTENTWRITE:
if (_outputs.contains(dop.getName())) {
dop.setDataOpType(DataOpTypes.TRANSIENTWRITE);
dop.setRowsInBlock(dop.getInput().get(0).getRowsInBlock());
dop.setColsInBlock(dop.getInput().get(0).getColsInBlock());
if (hop.getDataType() == DataType.SCALAR) {
dop.removeInput("iofilename");
}
} else
LOG.warn("Non-registered persistent write of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
break;
default:
}
}
// mark processed
hop.setVisited();
}
use of org.apache.sysml.hops.DataOp in project systemml by apache.
the class RewriteSplitDagDataDependentOperators method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
// DAG splits not required for forced single node
if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
return Arrays.asList(sb);
ArrayList<StatementBlock> ret = new ArrayList<>();
// collect all unknown csv reads hops
ArrayList<Hop> cand = new ArrayList<>();
collectDataDependentOperators(sb.getHops(), cand);
Hop.resetVisitStatus(sb.getHops());
// split hop dag on demand
if (!cand.isEmpty()) {
// collect child operators of candidates (to prevent rewrite anomalies)
HashSet<Hop> candChilds = new HashSet<>();
collectCandidateChildOperators(cand, candChilds);
try {
// duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setDMLProg(sb.getDMLProg());
sb1.setParseInfo(sb);
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
// move data-dependent ops incl transient writes to new statement block
// (and replace original persistent read with transient read)
ArrayList<Hop> sb1hops = new ArrayList<>();
for (Hop c : cand) {
// if there are already transient writes use them and don't introduce artificial variables;
// unless there are transient reads w/ the same variable name in the current dag which can
// lead to invalid reordering if variable consumers are not feeding into the candidate op.
boolean hasTWrites = hasTransientWriteParents(c);
boolean moveTWrite = hasTWrites ? HopRewriteUtils.rHasSimpleReadChain(c, getFirstTransientWriteParent(c).getName()) : false;
String varname = null;
long rlen = c.getDim1();
long clen = c.getDim2();
long nnz = c.getNnz();
UpdateType update = c.getUpdateType();
int brlen = c.getRowsInBlock();
int bclen = c.getColsInBlock();
if (// reuse existing transient_write
hasTWrites && moveTWrite) {
Hop twrite = getFirstTransientWriteParent(c);
varname = twrite.getName();
// create new transient read
DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
// replace data-dependent operator with transient read
ArrayList<Hop> parents = new ArrayList<>(c.getParent());
for (int i = 0; i < parents.size(); i++) {
// prevent concurrent modification by index access
Hop parent = parents.get(i);
if (!candChilds.contains(parent)) {
// anomaly filter
if (parent != twrite)
HopRewriteUtils.replaceChildReference(parent, c, tread);
else
sb.getHops().remove(parent);
}
}
// add data-dependent operator sub dag to first statement block
sb1hops.add(twrite);
} else // create transient write to artificial variables
{
varname = createCutVarName(false);
// create new transient read
DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
// replace data-dependent operator with transient read
ArrayList<Hop> parents = new ArrayList<>(c.getParent());
for (int i = 0; i < parents.size(); i++) {
// prevent concurrent modification by index access
Hop parent = parents.get(i);
if (// anomaly filter
!candChilds.contains(parent))
HopRewriteUtils.replaceChildReference(parent, c, tread);
}
// add data-dependent operator sub dag to first statement block
DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
twrite.setVisited();
twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(c, twrite);
sb1hops.add(twrite);
}
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = new DataIdentifier(varname);
diVar.setDimensions(rlen, clen);
diVar.setBlockDimensions(brlen, bclen);
diVar.setDataType(c.getDataType());
diVar.setValueType(c.getValueType());
sb1.liveOut().addVariable(varname, new DataIdentifier(diVar));
sb.liveIn().addVariable(varname, new DataIdentifier(diVar));
}
// ensure disjoint operators across DAGs (prevent replicated operations)
handleReplicatedOperators(sb1hops, sb.getHops(), sb1.liveOut(), sb.liveIn());
// deep copy new dag (in order to prevent any dangling references)
sb1.setHops(Recompiler.deepCopyHopsDag(sb1hops));
sb1.updateRecompilationFlag();
// avoid later merge by other rewrites
sb1.setSplitDag(true);
// recursive application of rewrite rule (in case of multiple data dependent operators
// with data dependencies in between each other)
List<StatementBlock> tmp = rewriteStatementBlock(sb1, state);
// add new statement blocks to output
// statement block with data dependent hops
ret.addAll(tmp);
// statement block with remaining hops
ret.add(sb);
// avoid later merge by other rewrites
sb.setSplitDag(true);
} catch (Exception ex) {
throw new HopsException("Failed to split hops dag for data dependent operators with unknown size.", ex);
}
LOG.debug("Applied splitDagDataDependentOperators (lines " + sb.getBeginLine() + "-" + sb.getEndLine() + ").");
} else // keep original hop dag
{
ret.add(sb);
}
return ret;
}
use of org.apache.sysml.hops.DataOp in project systemml by apache.
the class RewriteSplitDagDataDependentOperators method handleReplicatedOperators.
private void handleReplicatedOperators(ArrayList<Hop> rootsSB1, ArrayList<Hop> rootsSB2, VariableSet sb1out, VariableSet sb2in) {
// step 1: create probe set SB1
HashSet<Hop> probeSet = new HashSet<>();
Hop.resetVisitStatus(rootsSB1);
for (Hop h : rootsSB1) rAddHopsToProbeSet(h, probeSet);
// step 2: probe SB2 operators top-down (collect cut candidates)
HashSet<Pair<Hop, Hop>> candSet = new HashSet<>();
Hop.resetVisitStatus(rootsSB2);
for (Hop h : rootsSB2) rProbeAndAddHopsToCandidateSet(h, probeSet, candSet);
// step 3: create additional cuts with reuse for common references
HashMap<Long, DataOp> reuseTRead = new HashMap<>();
for (Pair<Hop, Hop> p : candSet) {
Hop hop = p.getKey();
Hop c = p.getValue();
DataOp tread = reuseTRead.get(c.getHopID());
if (tread == null) {
String varname = createCutVarName(false);
tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, c.getDim1(), c.getDim2(), c.getNnz(), c.getUpdateType(), c.getRowsInBlock(), c.getColsInBlock());
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
reuseTRead.put(c.getHopID(), tread);
DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
twrite.setVisited();
twrite.setOutputParams(c.getDim1(), c.getDim2(), c.getNnz(), c.getUpdateType(), c.getRowsInBlock(), c.getColsInBlock());
HopRewriteUtils.copyLineNumbers(c, twrite);
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = new DataIdentifier(varname);
diVar.setDimensions(c.getDim1(), c.getDim2());
diVar.setBlockDimensions(c.getRowsInBlock(), c.getColsInBlock());
diVar.setDataType(c.getDataType());
diVar.setValueType(c.getValueType());
sb1out.addVariable(varname, new DataIdentifier(diVar));
sb2in.addVariable(varname, new DataIdentifier(diVar));
rootsSB1.add(twrite);
}
// create additional cut by rewriting both hop dags
int pos = HopRewriteUtils.getChildReferencePos(hop, c);
HopRewriteUtils.removeChildReferenceByPos(hop, c, pos);
HopRewriteUtils.addChildReference(hop, tread, pos);
}
}
use of org.apache.sysml.hops.DataOp in project systemml by apache.
the class RewriteTransientWriteParentHandling method rule_RehangTransientWriteParents.
private void rule_RehangTransientWriteParents(Hop hop, ArrayList<Hop> sbHops) {
if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() == DataOpTypes.TRANSIENTWRITE && !hop.getParent().isEmpty()) {
// update parents inputs with data op input
for (Hop p : hop.getParent()) {
p.getInput().set(p.getInput().indexOf(hop), hop.getInput().get(0));
}
// update dataop input parent to add new parents except for
// dataop itself
hop.getInput().get(0).getParent().addAll(hop.getParent());
// remove dataop parents
hop.getParent().clear();
// add dataop as root for this Hops DAG
sbHops.add(hop);
// do the same thing for my inputs (children)
for (Hop hi : hop.getInput()) {
rule_RehangTransientWriteParents(hi, sbHops);
}
}
}
Aggregations