use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class RewriteSplitDagDataDependentOperators method handleReplicatedOperators.
private void handleReplicatedOperators(ArrayList<Hop> rootsSB1, ArrayList<Hop> rootsSB2, VariableSet sb1out, VariableSet sb2in) {
// step 1: create probe set SB1
HashSet<Hop> probeSet = new HashSet<>();
Hop.resetVisitStatus(rootsSB1);
for (Hop h : rootsSB1) rAddHopsToProbeSet(h, probeSet);
// step 2: probe SB2 operators top-down (collect cut candidates)
HashSet<Pair<Hop, Hop>> candSet = new HashSet<>();
Hop.resetVisitStatus(rootsSB2);
for (Hop h : rootsSB2) rProbeAndAddHopsToCandidateSet(h, probeSet, candSet);
// step 3: create additional cuts with reuse for common references
HashMap<Long, DataOp> reuseTRead = new HashMap<>();
for (Pair<Hop, Hop> p : candSet) {
Hop hop = p.getKey();
Hop c = p.getValue();
DataOp tread = reuseTRead.get(c.getHopID());
if (tread == null) {
String varname = createCutVarName(false);
tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, c.getDim1(), c.getDim2(), c.getNnz(), c.getUpdateType(), c.getRowsInBlock(), c.getColsInBlock());
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
reuseTRead.put(c.getHopID(), tread);
DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
twrite.setVisited();
twrite.setOutputParams(c.getDim1(), c.getDim2(), c.getNnz(), c.getUpdateType(), c.getRowsInBlock(), c.getColsInBlock());
HopRewriteUtils.copyLineNumbers(c, twrite);
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = new DataIdentifier(varname);
diVar.setDimensions(c.getDim1(), c.getDim2());
diVar.setBlockDimensions(c.getRowsInBlock(), c.getColsInBlock());
diVar.setDataType(c.getDataType());
diVar.setValueType(c.getValueType());
sb1out.addVariable(varname, new DataIdentifier(diVar));
sb2in.addVariable(varname, new DataIdentifier(diVar));
rootsSB1.add(twrite);
}
// create additional cut by rewriting both hop dags
int pos = HopRewriteUtils.getChildReferencePos(hop, c);
HopRewriteUtils.removeChildReferenceByPos(hop, c, pos);
HopRewriteUtils.addChildReference(hop, tread, pos);
}
}
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class RewriteSplitDagDataDependentOperators method rewriteStatementBlock.
@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
// DAG splits not required for forced single node
if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
return Arrays.asList(sb);
ArrayList<StatementBlock> ret = new ArrayList<>();
// collect all unknown csv reads hops
ArrayList<Hop> cand = new ArrayList<>();
collectDataDependentOperators(sb.getHops(), cand);
Hop.resetVisitStatus(sb.getHops());
// split hop dag on demand
if (!cand.isEmpty()) {
// collect child operators of candidates (to prevent rewrite anomalies)
HashSet<Hop> candChilds = new HashSet<>();
collectCandidateChildOperators(cand, candChilds);
try {
// duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setDMLProg(sb.getDMLProg());
sb1.setParseInfo(sb);
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
// move data-dependent ops incl transient writes to new statement block
// (and replace original persistent read with transient read)
ArrayList<Hop> sb1hops = new ArrayList<>();
for (Hop c : cand) {
// if there are already transient writes use them and don't introduce artificial variables;
// unless there are transient reads w/ the same variable name in the current dag which can
// lead to invalid reordering if variable consumers are not feeding into the candidate op.
boolean hasTWrites = hasTransientWriteParents(c);
boolean moveTWrite = hasTWrites ? HopRewriteUtils.rHasSimpleReadChain(c, getFirstTransientWriteParent(c).getName()) : false;
String varname = null;
long rlen = c.getDim1();
long clen = c.getDim2();
long nnz = c.getNnz();
UpdateType update = c.getUpdateType();
int brlen = c.getRowsInBlock();
int bclen = c.getColsInBlock();
if (// reuse existing transient_write
hasTWrites && moveTWrite) {
Hop twrite = getFirstTransientWriteParent(c);
varname = twrite.getName();
// create new transient read
DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
// replace data-dependent operator with transient read
ArrayList<Hop> parents = new ArrayList<>(c.getParent());
for (int i = 0; i < parents.size(); i++) {
// prevent concurrent modification by index access
Hop parent = parents.get(i);
if (!candChilds.contains(parent)) {
// anomaly filter
if (parent != twrite)
HopRewriteUtils.replaceChildReference(parent, c, tread);
else
sb.getHops().remove(parent);
}
}
// add data-dependent operator sub dag to first statement block
sb1hops.add(twrite);
} else // create transient write to artificial variables
{
varname = createCutVarName(false);
// create new transient read
DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
tread.setVisited();
HopRewriteUtils.copyLineNumbers(c, tread);
// replace data-dependent operator with transient read
ArrayList<Hop> parents = new ArrayList<>(c.getParent());
for (int i = 0; i < parents.size(); i++) {
// prevent concurrent modification by index access
Hop parent = parents.get(i);
if (// anomaly filter
!candChilds.contains(parent))
HopRewriteUtils.replaceChildReference(parent, c, tread);
}
// add data-dependent operator sub dag to first statement block
DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
twrite.setVisited();
twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(c, twrite);
sb1hops.add(twrite);
}
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = new DataIdentifier(varname);
diVar.setDimensions(rlen, clen);
diVar.setBlockDimensions(brlen, bclen);
diVar.setDataType(c.getDataType());
diVar.setValueType(c.getValueType());
sb1.liveOut().addVariable(varname, new DataIdentifier(diVar));
sb.liveIn().addVariable(varname, new DataIdentifier(diVar));
}
// ensure disjoint operators across DAGs (prevent replicated operations)
handleReplicatedOperators(sb1hops, sb.getHops(), sb1.liveOut(), sb.liveIn());
// deep copy new dag (in order to prevent any dangling references)
sb1.setHops(Recompiler.deepCopyHopsDag(sb1hops));
sb1.updateRecompilationFlag();
// avoid later merge by other rewrites
sb1.setSplitDag(true);
// recursive application of rewrite rule (in case of multiple data dependent operators
// with data dependencies in between each other)
List<StatementBlock> tmp = rewriteStatementBlock(sb1, state);
// add new statement blocks to output
// statement block with data dependent hops
ret.addAll(tmp);
// statement block with remaining hops
ret.add(sb);
// avoid later merge by other rewrites
sb.setSplitDag(true);
} catch (Exception ex) {
throw new HopsException("Failed to split hops dag for data dependent operators with unknown size.", ex);
}
LOG.debug("Applied splitDagDataDependentOperators (lines " + sb.getBeginLine() + "-" + sb.getEndLine() + ").");
} else // keep original hop dag
{
ret.add(sb);
}
return ret;
}
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class InterProceduralAnalysis method extractFunctionCallReturnStatistics.
/**
* Extract return variable statistics from this function into the
* calling program.
*
* @param fstmt The function statement.
* @param fop The function op.
* @param tmpVars Function's map of variables eligible for
* extraction.
* @param callVars Calling program's map of variables.
* @param overwrite Whether or not to overwrite variables in the
* calling program's variable map.
*/
private static void extractFunctionCallReturnStatistics(FunctionStatement fstmt, FunctionOp fop, LocalVariableMap tmpVars, LocalVariableMap callVars, boolean overwrite) {
ArrayList<DataIdentifier> foutputOps = fstmt.getOutputParams();
String[] outputVars = fop.getOutputVariableNames();
String fkey = fop.getFunctionKey();
try {
for (int i = 0; i < foutputOps.size(); i++) {
DataIdentifier di = foutputOps.get(i);
// name in function signature
String fvarname = di.getName();
// name in calling program
String pvarname = outputVars[i];
// output, remove that variable from the calling program's variable map.
if (callVars.keySet().contains(pvarname)) {
DataType fdataType = di.getDataType();
DataType pdataType = callVars.get(pvarname).getDataType();
if (fdataType != pdataType) {
// datatype has changed, and the calling program is reassigning the
// the variable, so remove it from the calling variable map
callVars.remove(pvarname);
}
}
// Update or add to the calling program's variable map.
if (di.getDataType() == DataType.MATRIX && tmpVars.keySet().contains(fvarname)) {
MatrixObject moIn = (MatrixObject) tmpVars.get(fvarname);
if (// not existing so far
!callVars.keySet().contains(pvarname) || overwrite) {
MatrixObject moOut = createOutputMatrix(moIn.getNumRows(), moIn.getNumColumns(), moIn.getNnz());
callVars.put(pvarname, moOut);
} else // already existing: take largest
{
Data dat = callVars.get(pvarname);
if (dat instanceof MatrixObject) {
MatrixObject moOut = (MatrixObject) dat;
MatrixCharacteristics mc = moOut.getMatrixCharacteristics();
if (OptimizerUtils.estimateSizeExactSparsity(mc.getRows(), mc.getCols(), (mc.getNonZeros() > 0) ? OptimizerUtils.getSparsity(mc) : 1.0) < OptimizerUtils.estimateSize(moIn.getNumRows(), moIn.getNumColumns())) {
// update statistics if necessary
mc.setDimension(moIn.getNumRows(), moIn.getNumColumns());
mc.setNonZeros(moIn.getNnz());
}
}
}
}
}
} catch (Exception ex) {
throw new HopsException("Failed to extract output statistics of function " + fkey + ".", ex);
}
}
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class InterProceduralAnalysis method populateLocalVariableMapForFunctionCall.
private static void populateLocalVariableMapForFunctionCall(FunctionStatement fstmt, FunctionOp fop, LocalVariableMap callvars, LocalVariableMap vars, FunctionCallSizeInfo fcallSizes) {
ArrayList<DataIdentifier> inputVars = fstmt.getInputParams();
ArrayList<Hop> inputOps = fop.getInput();
String fkey = fop.getFunctionKey();
for (int i = 0; i < inputVars.size(); i++) {
// create mapping between input hops and vars
DataIdentifier dat = inputVars.get(i);
Hop input = inputOps.get(i);
if (input.getDataType() == DataType.MATRIX) {
// propagate matrix characteristics
MatrixObject mo = new MatrixObject(ValueType.DOUBLE, null);
MatrixCharacteristics mc = new MatrixCharacteristics(input.getDim1(), input.getDim2(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), fcallSizes.isSafeNnz(fkey, i) ? input.getNnz() : -1);
MetaDataFormat meta = new MetaDataFormat(mc, null, null);
mo.setMetaData(meta);
vars.put(dat.getName(), mo);
} else if (input.getDataType() == DataType.SCALAR) {
// (for multiple calls, literal equivalence already checked)
if (input instanceof LiteralOp) {
vars.put(dat.getName(), ScalarObjectFactory.createScalarObject(input.getValueType(), (LiteralOp) input));
} else // and input scalar is existing variable in symbol table
if (PROPAGATE_SCALAR_VARS_INTO_FUN && fcallSizes.getFunctionCallCount(fkey) == 1 && input instanceof DataOp) {
Data scalar = callvars.get(input.getName());
if (scalar != null && scalar instanceof ScalarObject) {
vars.put(dat.getName(), scalar);
}
}
}
}
}
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class HopRewriteUtils method createDataGenOp.
public static Hop createDataGenOp(Hop input, double value) {
Hop rows = input.rowsKnown() ? new LiteralOp(input.getDim1()) : new UnaryOp("tmprows", DataType.SCALAR, ValueType.INT, OpOp1.NROW, input);
Hop cols = input.colsKnown() ? new LiteralOp(input.getDim2()) : new UnaryOp("tmpcols", DataType.SCALAR, ValueType.INT, OpOp1.NCOL, input);
Hop val = new LiteralOp(value);
HashMap<String, Hop> params = new HashMap<>();
params.put(DataExpression.RAND_ROWS, rows);
params.put(DataExpression.RAND_COLS, cols);
params.put(DataExpression.RAND_MIN, val);
params.put(DataExpression.RAND_MAX, val);
params.put(DataExpression.RAND_PDF, new LiteralOp(DataExpression.RAND_PDF_UNIFORM));
params.put(DataExpression.RAND_LAMBDA, new LiteralOp(-1.0));
params.put(DataExpression.RAND_SPARSITY, new LiteralOp(1.0));
params.put(DataExpression.RAND_SEED, new LiteralOp(DataGenOp.UNSPECIFIED_SEED));
// note internal refresh size information
Hop datagen = new DataGenOp(DataGenMethod.RAND, new DataIdentifier("tmp"), params);
datagen.setOutputBlocksizes(input.getRowsInBlock(), input.getColsInBlock());
copyLineNumbers(input, datagen);
if (value == 0)
datagen.setNnz(0);
return datagen;
}
Aggregations