use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class OptimizerRuleBased method rIsInLoop.
/*
* This will check if candidate LeftIndexingOp are in loop (while, for or parfor).
*
* @param pn: OpNode of parfor loop
* @param uipCandHopHM: Hashmap of UIPCandidateHop with name as a key.
* @throws DMLRuntimeException
*/
private void rIsInLoop(OptNode pn, HashMap<String, ArrayList<UIPCandidateHop>> uipCandHopHM, boolean bInLoop) throws DMLRuntimeException {
if (!pn.isLeaf()) {
ProgramBlock pb = (ProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
VariableSet varUpdated = pb.getStatementBlock().variablesUpdated();
boolean bUIPCandHopUpdated = false;
for (Entry<String, ArrayList<UIPCandidateHop>> entry : uipCandHopHM.entrySet()) {
String uipCandHopID = entry.getKey();
if (varUpdated.containsVariable(uipCandHopID)) {
bUIPCandHopUpdated = true;
break;
}
}
// As none of the UIP candidates updated in this DAG, no need for further processing within this DAG
if (!bUIPCandHopUpdated)
return;
boolean bLoop = false;
if (bInLoop || pb instanceof WhileProgramBlock || (pb instanceof ParForProgramBlock && ((ParForProgramBlock) pb).getDegreeOfParallelism() == 1) || (pb instanceof ForProgramBlock && !(pb instanceof ParForProgramBlock)))
bLoop = true;
for (OptNode optNode : pn.getChilds()) {
rIsInLoop(optNode, uipCandHopHM, bLoop);
}
} else {
Hop hop = (Hop) OptTreeConverter.getAbstractPlanMapping().getMappedHop(pn.getID());
for (Entry<String, ArrayList<UIPCandidateHop>> entry : uipCandHopHM.entrySet()) {
ArrayList<UIPCandidateHop> uipCandHopList = entry.getValue();
if (uipCandHopList != null) {
for (UIPCandidateHop uipCandHop : uipCandHopList) {
//Identify where intermediate object has been defined.
if (hop instanceof DataGenOp && hop.getName().equals(uipCandHop.getLixHop().getName())) {
uipCandHop.setHop(hop);
uipCandHop.setLocation(hop.getBeginLine());
uipCandHop.setIntermediate(true);
}
//Update if candiate hop defined outside this loop, and leftindexing is within this loop.
if ((bInLoop) && (uipCandHop.getLocation() <= hop.getBeginLine() && uipCandHop.getLixHop().getBeginLine() <= hop.getEndLine()))
uipCandHop.setIsLoopApplicable(true);
}
}
}
}
}
use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class ParForProgramBlock method exportMatricesToHDFS.
private void exportMatricesToHDFS(ExecutionContext ec, String... blacklistNames) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
Set<String> blacklist = UtilFunctions.asSet(blacklistNames);
if (LIVEVAR_AWARE_EXPORT && sb != null) {
// optimization to prevent unnecessary export of matrices
// export only variables that are read in the body
VariableSet varsRead = sb.variablesRead();
for (String key : ec.getVariables().keySet()) {
if (varsRead.containsVariable(key) && !blacklist.contains(key)) {
Data d = ec.getVariable(key);
if (d.getDataType() == DataType.MATRIX)
((MatrixObject) d).exportData(_replicationExport);
}
}
} else {
// export all matrices in symbol table
for (String key : ec.getVariables().keySet()) {
if (!blacklist.contains(key)) {
Data d = ec.getVariable(key);
if (d.getDataType() == DataType.MATRIX)
((MatrixObject) d).exportData(_replicationExport);
}
}
}
}
use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class ParForProgramBlock method createEmptyUnscopedVariables.
/**
* Create empty matrix objects and scalars for all unscoped vars
* (created within the parfor).
*
* NOTE: parfor gives no guarantees on the values of those objects - hence
* we return -1 for sclars and empty matrix objects.
*
* @param out local variable map
* @param sb statement block
*/
private static void createEmptyUnscopedVariables(LocalVariableMap out, StatementBlock sb) {
VariableSet updated = sb.variablesUpdated();
VariableSet livein = sb.liveIn();
// for all vars IN <updated> AND NOT IN <livein>
for (String var : updated.getVariableNames()) if (!livein.containsVariable(var)) {
// create empty output
DataIdentifier dat = updated.getVariable(var);
DataType datatype = dat.getDataType();
ValueType valuetype = dat.getValueType();
Data dataObj = null;
switch(datatype) {
case SCALAR:
switch(valuetype) {
case BOOLEAN:
dataObj = new BooleanObject(false);
break;
case INT:
dataObj = new IntObject(-1);
break;
case DOUBLE:
dataObj = new DoubleObject(-1d);
break;
case STRING:
dataObj = new StringObject("-1");
break;
default:
throw new DMLRuntimeException("Value type not supported: " + valuetype);
}
break;
case MATRIX:
case FRAME:
// because metadata (e.g., outputinfo) not known at this place.
break;
case UNKNOWN:
break;
default:
throw new DMLRuntimeException("Data type not supported: " + datatype);
}
if (dataObj != null)
out.put(var, dataObj);
}
}
use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class RewriteInjectSparkLoopCheckpointing method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
if (!OptimizerUtils.isSparkExecutionMode()) {
// nothing to do here, return original statement block
return Arrays.asList(sb);
}
// 1) We currently add checkpoint operations without information about the global program structure,
// this assumes that redundant checkpointing is prevented at runtime level (instruction-level)
// 2) Also, we do not take size information into account right now. This means that all candidates
// are checkpointed even if they are only used by CP operations.
ArrayList<StatementBlock> ret = new ArrayList<>();
// block size set by reblock rewrite
int blocksize = status.getBlocksize();
// optimization because otherwise we would prevent remote parfor)
if (// incl parfor
(sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) && (_checkCtx ? !status.isInParforContext() : true)) {
// step 1: determine checkpointing candidates
ArrayList<String> candidates = new ArrayList<>();
VariableSet read = sb.variablesRead();
VariableSet updated = sb.variablesUpdated();
for (String rvar : read.getVariableNames()) if (!updated.containsVariable(rvar) && read.getVariable(rvar).getDataType() == DataType.MATRIX)
candidates.add(rvar);
// step 2: insert statement block with checkpointing operations
if (// existing candidates
!candidates.isEmpty()) {
StatementBlock sb0 = new StatementBlock();
sb0.setDMLProg(sb.getDMLProg());
sb0.setParseInfo(sb);
ArrayList<Hop> hops = new ArrayList<>();
VariableSet livein = new VariableSet();
VariableSet liveout = new VariableSet();
for (String var : candidates) {
DataIdentifier dat = read.getVariable(var);
long dim1 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim1() : dat.getDim1();
long dim2 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim2() : dat.getDim2();
DataOp tread = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, DataOpTypes.TRANSIENTREAD, dat.getFilename(), dim1, dim2, dat.getNnz(), blocksize, blocksize);
tread.setRequiresCheckpoint(true);
DataOp twrite = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, tread, DataOpTypes.TRANSIENTWRITE, null);
HopRewriteUtils.setOutputParameters(twrite, dim1, dim2, blocksize, blocksize, dat.getNnz());
hops.add(twrite);
livein.addVariable(var, read.getVariable(var));
liveout.addVariable(var, read.getVariable(var));
}
sb0.setHops(hops);
sb0.setLiveIn(livein);
sb0.setLiveOut(liveout);
sb0.setSplitDag(true);
ret.add(sb0);
// maintain rewrite status
status.setInjectedCheckpoints();
}
}
// add original statement block to end
ret.add(sb);
return ret;
}
use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.
the class RewriteSplitDagUnknownCSVRead method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
// DAG splits not required for forced single node
if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
return Arrays.asList(sb);
ArrayList<StatementBlock> ret = new ArrayList<>();
// collect all unknown csv reads hops
ArrayList<Hop> cand = new ArrayList<>();
collectCSVReadHopsUnknownSize(sb.getHops(), cand);
// split hop dag on demand
if (!cand.isEmpty()) {
try {
// duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setDMLProg(sb.getDMLProg());
sb1.setParseInfo(sb);
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
// move csv reads incl reblock to new statement block
// (and replace original persistent read with transient read)
ArrayList<Hop> sb1hops = new ArrayList<>();
for (Hop reblock : cand) {
long rlen = reblock.getDim1();
long clen = reblock.getDim2();
long nnz = reblock.getNnz();
UpdateType update = reblock.getUpdateType();
int brlen = reblock.getRowsInBlock();
int bclen = reblock.getColsInBlock();
// (otherwise, for instance, literal ops are shared across dags)
for (int i = 0; i < reblock.getInput().size(); i++) if (reblock.getInput().get(i) instanceof LiteralOp)
HopRewriteUtils.replaceChildReference(reblock, reblock.getInput().get(i), new LiteralOp((LiteralOp) reblock.getInput().get(i)));
// create new transient read
DataOp tread = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, tread);
// replace reblock with transient read
ArrayList<Hop> parents = new ArrayList<>(reblock.getParent());
for (int i = 0; i < parents.size(); i++) {
Hop parent = parents.get(i);
HopRewriteUtils.replaceChildReference(parent, reblock, tread);
}
// add reblock sub dag to first statement block
DataOp twrite = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), reblock, DataOpTypes.TRANSIENTWRITE, null);
twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, twrite);
sb1hops.add(twrite);
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = sb.variablesRead().getVariable(reblock.getName());
if (diVar != null) {
// var read should always exist because persistent read
sb1.liveOut().addVariable(reblock.getName(), new DataIdentifier(diVar));
sb.liveIn().addVariable(reblock.getName(), new DataIdentifier(diVar));
}
}
sb1.setHops(sb1hops);
sb1.updateRecompilationFlag();
// statement block with csv reblocks
ret.add(sb1);
// statement block with remaining hops
ret.add(sb);
// avoid later merge by other rewrites
sb.setSplitDag(true);
} catch (Exception ex) {
throw new HopsException("Failed to split hops dag for csv read with unknown size.", ex);
}
LOG.debug("Applied splitDagUnknownCSVRead.");
} else // keep original hop dag
{
ret.add(sb);
}
return ret;
}
Aggregations