use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class RewriteMarkLoopVariablesUpdateInPlace method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
if (DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP || DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK) {
// nothing to do here, return original statement block
return Arrays.asList(sb);
}
if (// incl parfor
sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) {
ArrayList<String> candidates = new ArrayList<>();
VariableSet updated = sb.variablesUpdated();
VariableSet liveout = sb.liveOut();
for (String varname : updated.getVariableNames()) {
if (updated.getVariable(varname).getDataType() == DataType.MATRIX && // exclude local vars
liveout.containsVariable(varname)) {
if (sb instanceof WhileStatementBlock) {
WhileStatement wstmt = (WhileStatement) sb.getStatement(0);
if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
candidates.add(varname);
} else if (sb instanceof ForStatementBlock) {
ForStatement wstmt = (ForStatement) sb.getStatement(0);
if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
candidates.add(varname);
}
}
}
sb.setUpdateInPlaceVars(candidates);
}
// return modified statement block
return Arrays.asList(sb);
}
use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class RewriteSplitDagDataDependentOperators method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
// DAG splits not required for forced single node
if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
return Arrays.asList(sb);
ArrayList<StatementBlock> ret = new ArrayList<>();
// collect all unknown csv reads hops
ArrayList<Hop> cand = new ArrayList<>();
collectDataDependentOperators(sb.getHops(), cand);
Hop.resetVisitStatus(sb.getHops());
// split hop dag on demand
if (!cand.isEmpty()) {
// collect child operators of candidates (to prevent rewrite anomalies)
HashSet<Hop> candChilds = new HashSet<>();
collectCandidateChildOperators(cand, candChilds);
try {
// duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setDMLProg(sb.getDMLProg());
sb1.setParseInfo(sb);
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
// move data-dependent ops incl transient writes to new statement block
// (and replace original persistent read with transient read)
ArrayList<Hop> sb1hops = new ArrayList<>();
for (Hop c : cand) {
// if there are already transient writes use them and don't introduce artificial variables;
// unless there are transient reads w/ the same variable name in the current dag which can
// lead to invalid reordering if variable consumers are not feeding into the candidate op.
boolean hasTWrites = hasTransientWriteParents(c);
boolean moveTWrite = hasTWrites ? HopRewriteUtils.rHasSimpleReadChain(c, getFirstTransientWriteParent(c).getName()) : false;
String varname = null;
long rlen = c.getDim1();
long clen = c.getDim2();
long nnz = c.getNnz();
UpdateType update = c.getUpdateType();
int brlen = c.getRowsInBlock();
int bclen = c.getColsInBlock();
if (// reuse existing transient_write
hasTWrites && moveTWrite) {
Hop twrite = getFirstTransientWriteParent(c);
varname = twrite.getName();
// create new transient read
DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
// replace data-dependent operator with transient read
ArrayList<Hop> parents = new ArrayList<>(c.getParent());
for (int i = 0; i < parents.size(); i++) {
// prevent concurrent modification by index access
Hop parent = parents.get(i);
if (!candChilds.contains(parent)) {
// anomaly filter
if (parent != twrite)
HopRewriteUtils.replaceChildReference(parent, c, tread);
else
sb.getHops().remove(parent);
}
}
// add data-dependent operator sub dag to first statement block
sb1hops.add(twrite);
} else // create transient write to artificial variables
{
varname = createCutVarName(false);
// create new transient read
DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
// replace data-dependent operator with transient read
ArrayList<Hop> parents = new ArrayList<>(c.getParent());
for (int i = 0; i < parents.size(); i++) {
// prevent concurrent modification by index access
Hop parent = parents.get(i);
if (// anomaly filter
!candChilds.contains(parent))
HopRewriteUtils.replaceChildReference(parent, c, tread);
}
// add data-dependent operator sub dag to first statement block
DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
twrite.setVisited();
twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(c, twrite);
sb1hops.add(twrite);
}
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = new DataIdentifier(varname);
diVar.setDimensions(rlen, clen);
diVar.setBlockDimensions(brlen, bclen);
diVar.setDataType(c.getDataType());
diVar.setValueType(c.getValueType());
sb1.liveOut().addVariable(varname, new DataIdentifier(diVar));
sb.liveIn().addVariable(varname, new DataIdentifier(diVar));
}
// ensure disjoint operators across DAGs (prevent replicated operations)
handleReplicatedOperators(sb1hops, sb.getHops(), sb1.liveOut(), sb.liveIn());
// deep copy new dag (in order to prevent any dangling references)
sb1.setHops(Recompiler.deepCopyHopsDag(sb1hops));
sb1.updateRecompilationFlag();
// avoid later merge by other rewrites
sb1.setSplitDag(true);
// recursive application of rewrite rule (in case of multiple data dependent operators
// with data dependencies in between each other)
List<StatementBlock> tmp = rewriteStatementBlock(sb1, state);
// add new statement blocks to output
// statement block with data dependent hops
ret.addAll(tmp);
// statement block with remaining hops
ret.add(sb);
// avoid later merge by other rewrites
sb.setSplitDag(true);
} catch (Exception ex) {
throw new HopsException("Failed to split hops dag for data dependent operators with unknown size.", ex);
}
LOG.debug("Applied splitDagDataDependentOperators (lines " + sb.getBeginLine() + "-" + sb.getEndLine() + ").");
} else // keep original hop dag
{
ret.add(sb);
}
return ret;
}
use of org.apache.sysml.parser.VariableSet in project systemml by apache.
the class ParForProgramBlock method exportMatricesToHDFS.
private void exportMatricesToHDFS(ExecutionContext ec, String... blacklistNames) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
Set<String> blacklist = UtilFunctions.asSet(blacklistNames);
if (LIVEVAR_AWARE_EXPORT && sb != null) {
// optimization to prevent unnecessary export of matrices
// export only variables that are read in the body
VariableSet varsRead = sb.variablesRead();
for (String key : ec.getVariables().keySet()) {
if (varsRead.containsVariable(key) && !blacklist.contains(key)) {
Data d = ec.getVariable(key);
if (d.getDataType() == DataType.MATRIX)
((MatrixObject) d).exportData(_replicationExport);
}
}
} else {
// export all matrices in symbol table
for (String key : ec.getVariables().keySet()) {
if (!blacklist.contains(key)) {
Data d = ec.getVariable(key);
if (d.getDataType() == DataType.MATRIX)
((MatrixObject) d).exportData(_replicationExport);
}
}
}
}
use of org.apache.sysml.parser.VariableSet in project systemml by apache.
the class RewriteMarkLoopVariablesUpdateInPlace method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
if (DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP || DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK) {
// nothing to do here, return original statement block
return Arrays.asList(sb);
}
if (// incl parfor
sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) {
ArrayList<String> candidates = new ArrayList<>();
VariableSet updated = sb.variablesUpdated();
VariableSet liveout = sb.liveOut();
for (String varname : updated.getVariableNames()) {
if (updated.getVariable(varname).getDataType() == DataType.MATRIX && // exclude local vars
liveout.containsVariable(varname)) {
if (sb instanceof WhileStatementBlock) {
WhileStatement wstmt = (WhileStatement) sb.getStatement(0);
if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
candidates.add(varname);
} else if (sb instanceof ForStatementBlock) {
ForStatement wstmt = (ForStatement) sb.getStatement(0);
if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
candidates.add(varname);
}
}
}
sb.setUpdateInPlaceVars(candidates);
}
// return modified statement block
return Arrays.asList(sb);
}
use of org.apache.sysml.parser.VariableSet in project systemml by apache.
the class RewriteSplitDagUnknownCSVRead method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
// DAG splits not required for forced single node
if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
return Arrays.asList(sb);
ArrayList<StatementBlock> ret = new ArrayList<>();
// collect all unknown csv reads hops
ArrayList<Hop> cand = new ArrayList<>();
collectCSVReadHopsUnknownSize(sb.getHops(), cand);
// split hop dag on demand
if (!cand.isEmpty()) {
try {
// duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setDMLProg(sb.getDMLProg());
sb1.setParseInfo(sb);
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
// move csv reads incl reblock to new statement block
// (and replace original persistent read with transient read)
ArrayList<Hop> sb1hops = new ArrayList<>();
for (Hop reblock : cand) {
long rlen = reblock.getDim1();
long clen = reblock.getDim2();
long nnz = reblock.getNnz();
UpdateType update = reblock.getUpdateType();
int brlen = reblock.getRowsInBlock();
int bclen = reblock.getColsInBlock();
// (otherwise, for instance, literal ops are shared across dags)
for (int i = 0; i < reblock.getInput().size(); i++) if (reblock.getInput().get(i) instanceof LiteralOp)
HopRewriteUtils.replaceChildReference(reblock, reblock.getInput().get(i), new LiteralOp((LiteralOp) reblock.getInput().get(i)));
// create new transient read
DataOp tread = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, tread);
// replace reblock with transient read
ArrayList<Hop> parents = new ArrayList<>(reblock.getParent());
for (int i = 0; i < parents.size(); i++) {
Hop parent = parents.get(i);
HopRewriteUtils.replaceChildReference(parent, reblock, tread);
}
// add reblock sub dag to first statement block
DataOp twrite = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), reblock, DataOpTypes.TRANSIENTWRITE, null);
twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, twrite);
sb1hops.add(twrite);
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = sb.variablesRead().getVariable(reblock.getName());
if (diVar != null) {
// var read should always exist because persistent read
sb1.liveOut().addVariable(reblock.getName(), new DataIdentifier(diVar));
sb.liveIn().addVariable(reblock.getName(), new DataIdentifier(diVar));
}
}
sb1.setHops(sb1hops);
sb1.updateRecompilationFlag();
// statement block with csv reblocks
ret.add(sb1);
// statement block with remaining hops
ret.add(sb);
// avoid later merge by other rewrites
sb.setSplitDag(true);
} catch (Exception ex) {
throw new HopsException("Failed to split hops dag for csv read with unknown size.", ex);
}
LOG.debug("Applied splitDagUnknownCSVRead.");
} else // keep original hop dag
{
ret.add(sb);
}
return ret;
}
Aggregations