use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class RewriteInjectSparkLoopCheckpointing method rewriteStatementBlock.
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
if (!OptimizerUtils.isSparkExecutionMode()) {
// nothing to do here, return original statement block
return Arrays.asList(sb);
// 1) We currently add checkpoint operations without information about the global program structure,
// this assumes that redundant checkpointing is prevented at runtime level (instruction-level)
// 2) Also, we do not take size information into account right now. This means that all candidates
// are checkpointed even if they are only used by CP operations.
ArrayList<StatementBlock> ret = new ArrayList<>();
// block size set by reblock rewrite
int blocksize = status.getBlocksize();
// optimization because otherwise we would prevent remote parfor)
if (// incl parfor
(sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) && (_checkCtx ? !status.isInParforContext() : true)) {
// step 1: determine checkpointing candidates
ArrayList<String> candidates = new ArrayList<>();
VariableSet read = sb.variablesRead();
VariableSet updated = sb.variablesUpdated();
for (String rvar : read.getVariableNames()) if (!updated.containsVariable(rvar) && read.getVariable(rvar).getDataType() == DataType.MATRIX)
// step 2: insert statement block with checkpointing operations
if (// existing candidates
!candidates.isEmpty()) {
StatementBlock sb0 = new StatementBlock();
ArrayList<Hop> hops = new ArrayList<>();
VariableSet livein = new VariableSet();
VariableSet liveout = new VariableSet();
for (String var : candidates) {
DataIdentifier dat = read.getVariable(var);
long dim1 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim1() : dat.getDim1();
long dim2 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim2() : dat.getDim2();
DataOp tread = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, DataOpTypes.TRANSIENTREAD, dat.getFilename(), dim1, dim2, dat.getNnz(), blocksize, blocksize);
DataOp twrite = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, tread, DataOpTypes.TRANSIENTWRITE, null);
HopRewriteUtils.setOutputParameters(twrite, dim1, dim2, blocksize, blocksize, dat.getNnz());
livein.addVariable(var, read.getVariable(var));
liveout.addVariable(var, read.getVariable(var));
// maintain rewrite status
// add original statement block to end
return ret;
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class RewriteSplitDagUnknownCSVRead method rewriteStatementBlock.
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
// DAG splits not required for forced single node
if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
return Arrays.asList(sb);
ArrayList<StatementBlock> ret = new ArrayList<>();
// collect all unknown csv reads hops
ArrayList<Hop> cand = new ArrayList<>();
collectCSVReadHopsUnknownSize(sb.getHops(), cand);
// split hop dag on demand
if (!cand.isEmpty()) {
try {
// duplicate sb incl live variable sets
StatementBlock sb1 = new StatementBlock();
sb1.setLiveIn(new VariableSet());
sb1.setLiveOut(new VariableSet());
// move csv reads incl reblock to new statement block
// (and replace original persistent read with transient read)
ArrayList<Hop> sb1hops = new ArrayList<>();
for (Hop reblock : cand) {
long rlen = reblock.getDim1();
long clen = reblock.getDim2();
long nnz = reblock.getNnz();
UpdateType update = reblock.getUpdateType();
int brlen = reblock.getRowsInBlock();
int bclen = reblock.getColsInBlock();
// (otherwise, for instance, literal ops are shared across dags)
for (int i = 0; i < reblock.getInput().size(); i++) if (reblock.getInput().get(i) instanceof LiteralOp)
HopRewriteUtils.replaceChildReference(reblock, reblock.getInput().get(i), new LiteralOp((LiteralOp) reblock.getInput().get(i)));
// create new transient read
DataOp tread = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, tread);
// replace reblock with transient read
ArrayList<Hop> parents = new ArrayList<>(reblock.getParent());
for (int i = 0; i < parents.size(); i++) {
Hop parent = parents.get(i);
HopRewriteUtils.replaceChildReference(parent, reblock, tread);
// add reblock sub dag to first statement block
DataOp twrite = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), reblock, DataOpTypes.TRANSIENTWRITE, null);
twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
HopRewriteUtils.copyLineNumbers(reblock, twrite);
// update live in and out of new statement block (for piggybacking)
DataIdentifier diVar = sb.variablesRead().getVariable(reblock.getName());
if (diVar != null) {
// var read should always exist because persistent read
sb1.liveOut().addVariable(reblock.getName(), new DataIdentifier(diVar));
sb.liveIn().addVariable(reblock.getName(), new DataIdentifier(diVar));
// statement block with csv reblocks
// statement block with remaining hops
// avoid later merge by other rewrites
} catch (Exception ex) {
throw new HopsException("Failed to split hops dag for csv read with unknown size.", ex);
LOG.debug("Applied splitDagUnknownCSVRead.");
} else // keep original hop dag
return ret;
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class GenerateClassesForMLContext method generateFunctionCallMethodMLResults.
* Obtain method for invoking a script function and returning the results as
* an MLResults object. Currently this method is not used.
* @param scriptFilePath
* the path to a script file
* @param fs
* a SystemML function statement
* @param dmlFunctionCall
* a string representing the invocation of a script function
* @return string representation of a method that performs a function call
public static String generateFunctionCallMethodMLResults(String scriptFilePath, FunctionStatement fs, String dmlFunctionCall) {
StringBuilder sb = new StringBuilder();
sb.append("public org.apache.sysml.api.mlcontext.MLResults ");
ArrayList<DataIdentifier> inputParams = fs.getInputParams();
for (int i = 0; i < inputParams.size(); i++) {
if (i > 0) {
sb.append(", ");
DataIdentifier inputParam = inputParams.get(i);
* Note: Using Object is currently preferrable to using
* datatype/valuetype to explicitly set the input type to
* Integer/Double/Boolean/String since Object allows the automatic
* handling of things such as automatic conversions from longs to
* ints.
sb.append("Object ");
sb.append(") {\n");
sb.append("String scriptString = \"" + dmlFunctionCall + "\";\n");
sb.append("org.apache.sysml.api.mlcontext.Script script = new org.apache.sysml.api.mlcontext.Script(scriptString);\n");
ArrayList<DataIdentifier> outputParams = fs.getOutputParams();
if ((inputParams.size() > 0) || (outputParams.size() > 0)) {
for (int i = 0; i < inputParams.size(); i++) {
DataIdentifier inputParam = inputParams.get(i);
String name = inputParam.getName();
sb.append(".in(\"" + name + "\", " + name + ")");
for (int i = 0; i < outputParams.size(); i++) {
DataIdentifier outputParam = outputParams.get(i);
String name = outputParam.getName();
sb.append(".out(\"" + name + "\")");
if ((inputParams.size() > 0) || (outputParams.size() > 0)) {
sb.append("org.apache.sysml.api.mlcontext.MLResults results = script.execute();\n");
sb.append("return results;\n");
return sb.toString();
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class GenerateClassesForMLContext method generateDmlFunctionCall.
* Obtain the DML representing a function invocation.
* @param scriptFilePath
* the path to a script file
* @param fs
* a SystemML function statement
* @return string representation of a DML function invocation
public static String generateDmlFunctionCall(String scriptFilePath, FunctionStatement fs) {
StringBuilder sb = new StringBuilder();
sb.append("source('" + scriptFilePath + "') as mlcontextns;");
ArrayList<DataIdentifier> outputParams = fs.getOutputParams();
if (outputParams.size() == 0) {
if (outputParams.size() == 1) {
DataIdentifier outputParam = outputParams.get(0);
sb.append(" = mlcontextns::");
} else if (outputParams.size() > 1) {
for (int i = 0; i < outputParams.size(); i++) {
if (i > 0) {
sb.append(", ");
sb.append("] = mlcontextns::");
ArrayList<DataIdentifier> inputParams = fs.getInputParams();
for (int i = 0; i < inputParams.size(); i++) {
if (i > 0) {
sb.append(", ");
DataIdentifier inputParam = inputParams.get(i);
return sb.toString();
use of org.apache.sysml.parser.DataIdentifier in project incubator-systemml by apache.
the class IPAPassPropagateReplaceLiterals method rewriteProgram.
public void rewriteProgram(DMLProgram prog, FunctionCallGraph fgraph, FunctionCallSizeInfo fcallSizes) {
for (String fkey : fgraph.getReachableFunctions()) {
FunctionOp first = fgraph.getFunctionCalls(fkey).get(0);
// propagate and replace amenable literals into function
if (fcallSizes.hasSafeLiterals(fkey)) {
FunctionStatementBlock fsb = prog.getFunctionStatementBlock(fkey);
FunctionStatement fstmt = (FunctionStatement) fsb.getStatement(0);
ArrayList<DataIdentifier> finputs = fstmt.getInputParams();
// populate call vars with amenable literals
LocalVariableMap callVars = new LocalVariableMap();
for (int j = 0; j < finputs.size(); j++) if (fcallSizes.isSafeLiteral(fkey, j)) {
LiteralOp lit = (LiteralOp) first.getInput().get(j);
callVars.put(finputs.get(j).getName(), ScalarObjectFactory.createScalarObject(lit.getValueType(), lit));
// propagate and replace literals
for (StatementBlock sb : fstmt.getBody()) rReplaceLiterals(sb, callVars);