Search in sources :

Example 11 with VariableSet

use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.

the class RewriteMarkLoopVariablesUpdateInPlace method rewriteStatementBlock.

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
    if (DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP || DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK) {
        // nothing to do here, return original statement block
        return Arrays.asList(sb);
    }
    if (// incl parfor
    sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) {
        ArrayList<String> candidates = new ArrayList<>();
        VariableSet updated = sb.variablesUpdated();
        VariableSet liveout = sb.liveOut();
        for (String varname : updated.getVariableNames()) {
            if (updated.getVariable(varname).getDataType() == DataType.MATRIX && // exclude local vars
            liveout.containsVariable(varname)) {
                if (sb instanceof WhileStatementBlock) {
                    WhileStatement wstmt = (WhileStatement) sb.getStatement(0);
                    if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
                        candidates.add(varname);
                } else if (sb instanceof ForStatementBlock) {
                    ForStatement wstmt = (ForStatement) sb.getStatement(0);
                    if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
                        candidates.add(varname);
                }
            }
        }
        sb.setUpdateInPlaceVars(candidates);
    }
    // return modified statement block
    return Arrays.asList(sb);
}
Also used : ForStatementBlock(org.apache.sysml.parser.ForStatementBlock) VariableSet(org.apache.sysml.parser.VariableSet) ArrayList(java.util.ArrayList) WhileStatement(org.apache.sysml.parser.WhileStatement) ForStatement(org.apache.sysml.parser.ForStatement) WhileStatementBlock(org.apache.sysml.parser.WhileStatementBlock)

Example 12 with VariableSet

use of org.apache.sysml.parser.VariableSet in project incubator-systemml by apache.

the class RewriteSplitDagDataDependentOperators method rewriteStatementBlock.

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
    // DAG splits not required for forced single node
    if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
        return Arrays.asList(sb);
    ArrayList<StatementBlock> ret = new ArrayList<>();
    // collect all unknown csv reads hops
    ArrayList<Hop> cand = new ArrayList<>();
    collectDataDependentOperators(sb.getHops(), cand);
    Hop.resetVisitStatus(sb.getHops());
    // split hop dag on demand
    if (!cand.isEmpty()) {
        // collect child operators of candidates (to prevent rewrite anomalies)
        HashSet<Hop> candChilds = new HashSet<>();
        collectCandidateChildOperators(cand, candChilds);
        try {
            // duplicate sb incl live variable sets
            StatementBlock sb1 = new StatementBlock();
            sb1.setDMLProg(sb.getDMLProg());
            sb1.setParseInfo(sb);
            sb1.setLiveIn(new VariableSet());
            sb1.setLiveOut(new VariableSet());
            // move data-dependent ops incl transient writes to new statement block
            // (and replace original persistent read with transient read)
            ArrayList<Hop> sb1hops = new ArrayList<>();
            for (Hop c : cand) {
                // if there are already transient writes use them and don't introduce artificial variables;
                // unless there are transient reads w/ the same variable name in the current dag which can
                // lead to invalid reordering if variable consumers are not feeding into the candidate op.
                boolean hasTWrites = hasTransientWriteParents(c);
                boolean moveTWrite = hasTWrites ? HopRewriteUtils.rHasSimpleReadChain(c, getFirstTransientWriteParent(c).getName()) : false;
                String varname = null;
                long rlen = c.getDim1();
                long clen = c.getDim2();
                long nnz = c.getNnz();
                UpdateType update = c.getUpdateType();
                int brlen = c.getRowsInBlock();
                int bclen = c.getColsInBlock();
                if (// reuse existing transient_write
                hasTWrites && moveTWrite) {
                    Hop twrite = getFirstTransientWriteParent(c);
                    varname = twrite.getName();
                    // create new transient read
                    DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
                    tread.setVisited();
                    HopRewriteUtils.copyLineNumbers(c, tread);
                    // replace data-dependent operator with transient read
                    ArrayList<Hop> parents = new ArrayList<>(c.getParent());
                    for (int i = 0; i < parents.size(); i++) {
                        // prevent concurrent modification by index access
                        Hop parent = parents.get(i);
                        if (!candChilds.contains(parent)) {
                            // anomaly filter
                            if (parent != twrite)
                                HopRewriteUtils.replaceChildReference(parent, c, tread);
                            else
                                sb.getHops().remove(parent);
                        }
                    }
                    // add data-dependent operator sub dag to first statement block
                    sb1hops.add(twrite);
                } else // create transient write to artificial variables
                {
                    varname = createCutVarName(false);
                    // create new transient read
                    DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
                    tread.setVisited();
                    HopRewriteUtils.copyLineNumbers(c, tread);
                    // replace data-dependent operator with transient read
                    ArrayList<Hop> parents = new ArrayList<>(c.getParent());
                    for (int i = 0; i < parents.size(); i++) {
                        // prevent concurrent modification by index access
                        Hop parent = parents.get(i);
                        if (// anomaly filter
                        !candChilds.contains(parent))
                            HopRewriteUtils.replaceChildReference(parent, c, tread);
                    }
                    // add data-dependent operator sub dag to first statement block
                    DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
                    twrite.setVisited();
                    twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
                    HopRewriteUtils.copyLineNumbers(c, twrite);
                    sb1hops.add(twrite);
                }
                // update live in and out of new statement block (for piggybacking)
                DataIdentifier diVar = new DataIdentifier(varname);
                diVar.setDimensions(rlen, clen);
                diVar.setBlockDimensions(brlen, bclen);
                diVar.setDataType(c.getDataType());
                diVar.setValueType(c.getValueType());
                sb1.liveOut().addVariable(varname, new DataIdentifier(diVar));
                sb.liveIn().addVariable(varname, new DataIdentifier(diVar));
            }
            // ensure disjoint operators across DAGs (prevent replicated operations)
            handleReplicatedOperators(sb1hops, sb.getHops(), sb1.liveOut(), sb.liveIn());
            // deep copy new dag (in order to prevent any dangling references)
            sb1.setHops(Recompiler.deepCopyHopsDag(sb1hops));
            sb1.updateRecompilationFlag();
            // avoid later merge by other rewrites
            sb1.setSplitDag(true);
            // recursive application of rewrite rule (in case of multiple data dependent operators
            // with data dependencies in between each other)
            List<StatementBlock> tmp = rewriteStatementBlock(sb1, state);
            // add new statement blocks to output
            // statement block with data dependent hops
            ret.addAll(tmp);
            // statement block with remaining hops
            ret.add(sb);
            // avoid later merge by other rewrites
            sb.setSplitDag(true);
        } catch (Exception ex) {
            throw new HopsException("Failed to split hops dag for data dependent operators with unknown size.", ex);
        }
        LOG.debug("Applied splitDagDataDependentOperators (lines " + sb.getBeginLine() + "-" + sb.getEndLine() + ").");
    } else // keep original hop dag
    {
        ret.add(sb);
    }
    return ret;
}
Also used : DataIdentifier(org.apache.sysml.parser.DataIdentifier) ArrayList(java.util.ArrayList) Hop(org.apache.sysml.hops.Hop) HopsException(org.apache.sysml.hops.HopsException) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) HopsException(org.apache.sysml.hops.HopsException) VariableSet(org.apache.sysml.parser.VariableSet) DataOp(org.apache.sysml.hops.DataOp) StatementBlock(org.apache.sysml.parser.StatementBlock) HashSet(java.util.HashSet)

Example 13 with VariableSet

use of org.apache.sysml.parser.VariableSet in project systemml by apache.

the class ParForProgramBlock method exportMatricesToHDFS.

private void exportMatricesToHDFS(ExecutionContext ec, String... blacklistNames) {
    ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
    Set<String> blacklist = UtilFunctions.asSet(blacklistNames);
    if (LIVEVAR_AWARE_EXPORT && sb != null) {
        // optimization to prevent unnecessary export of matrices
        // export only variables that are read in the body
        VariableSet varsRead = sb.variablesRead();
        for (String key : ec.getVariables().keySet()) {
            if (varsRead.containsVariable(key) && !blacklist.contains(key)) {
                Data d = ec.getVariable(key);
                if (d.getDataType() == DataType.MATRIX)
                    ((MatrixObject) d).exportData(_replicationExport);
            }
        }
    } else {
        // export all matrices in symbol table
        for (String key : ec.getVariables().keySet()) {
            if (!blacklist.contains(key)) {
                Data d = ec.getVariable(key);
                if (d.getDataType() == DataType.MATRIX)
                    ((MatrixObject) d).exportData(_replicationExport);
            }
        }
    }
}
Also used : VariableSet(org.apache.sysml.parser.VariableSet) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) Data(org.apache.sysml.runtime.instructions.cp.Data)

Example 14 with VariableSet

use of org.apache.sysml.parser.VariableSet in project systemml by apache.

the class RewriteMarkLoopVariablesUpdateInPlace method rewriteStatementBlock.

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
    if (DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP || DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK) {
        // nothing to do here, return original statement block
        return Arrays.asList(sb);
    }
    if (// incl parfor
    sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) {
        ArrayList<String> candidates = new ArrayList<>();
        VariableSet updated = sb.variablesUpdated();
        VariableSet liveout = sb.liveOut();
        for (String varname : updated.getVariableNames()) {
            if (updated.getVariable(varname).getDataType() == DataType.MATRIX && // exclude local vars
            liveout.containsVariable(varname)) {
                if (sb instanceof WhileStatementBlock) {
                    WhileStatement wstmt = (WhileStatement) sb.getStatement(0);
                    if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
                        candidates.add(varname);
                } else if (sb instanceof ForStatementBlock) {
                    ForStatement wstmt = (ForStatement) sb.getStatement(0);
                    if (rIsApplicableForUpdateInPlace(wstmt.getBody(), varname))
                        candidates.add(varname);
                }
            }
        }
        sb.setUpdateInPlaceVars(candidates);
    }
    // return modified statement block
    return Arrays.asList(sb);
}
Also used : ForStatementBlock(org.apache.sysml.parser.ForStatementBlock) VariableSet(org.apache.sysml.parser.VariableSet) ArrayList(java.util.ArrayList) WhileStatement(org.apache.sysml.parser.WhileStatement) ForStatement(org.apache.sysml.parser.ForStatement) WhileStatementBlock(org.apache.sysml.parser.WhileStatementBlock)

Example 15 with VariableSet

use of org.apache.sysml.parser.VariableSet in project systemml by apache.

the class RewriteSplitDagUnknownCSVRead method rewriteStatementBlock.

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
    // DAG splits not required for forced single node
    if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
        return Arrays.asList(sb);
    ArrayList<StatementBlock> ret = new ArrayList<>();
    // collect all unknown csv reads hops
    ArrayList<Hop> cand = new ArrayList<>();
    collectCSVReadHopsUnknownSize(sb.getHops(), cand);
    // split hop dag on demand
    if (!cand.isEmpty()) {
        try {
            // duplicate sb incl live variable sets
            StatementBlock sb1 = new StatementBlock();
            sb1.setDMLProg(sb.getDMLProg());
            sb1.setParseInfo(sb);
            sb1.setLiveIn(new VariableSet());
            sb1.setLiveOut(new VariableSet());
            // move csv reads incl reblock to new statement block
            // (and replace original persistent read with transient read)
            ArrayList<Hop> sb1hops = new ArrayList<>();
            for (Hop reblock : cand) {
                long rlen = reblock.getDim1();
                long clen = reblock.getDim2();
                long nnz = reblock.getNnz();
                UpdateType update = reblock.getUpdateType();
                int brlen = reblock.getRowsInBlock();
                int bclen = reblock.getColsInBlock();
                // (otherwise, for instance, literal ops are shared across dags)
                for (int i = 0; i < reblock.getInput().size(); i++) if (reblock.getInput().get(i) instanceof LiteralOp)
                    HopRewriteUtils.replaceChildReference(reblock, reblock.getInput().get(i), new LiteralOp((LiteralOp) reblock.getInput().get(i)));
                // create new transient read
                DataOp tread = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
                HopRewriteUtils.copyLineNumbers(reblock, tread);
                // replace reblock with transient read
                ArrayList<Hop> parents = new ArrayList<>(reblock.getParent());
                for (int i = 0; i < parents.size(); i++) {
                    Hop parent = parents.get(i);
                    HopRewriteUtils.replaceChildReference(parent, reblock, tread);
                }
                // add reblock sub dag to first statement block
                DataOp twrite = new DataOp(reblock.getName(), reblock.getDataType(), reblock.getValueType(), reblock, DataOpTypes.TRANSIENTWRITE, null);
                twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
                HopRewriteUtils.copyLineNumbers(reblock, twrite);
                sb1hops.add(twrite);
                // update live in and out of new statement block (for piggybacking)
                DataIdentifier diVar = sb.variablesRead().getVariable(reblock.getName());
                if (diVar != null) {
                    // var read should always exist because persistent read
                    sb1.liveOut().addVariable(reblock.getName(), new DataIdentifier(diVar));
                    sb.liveIn().addVariable(reblock.getName(), new DataIdentifier(diVar));
                }
            }
            sb1.setHops(sb1hops);
            sb1.updateRecompilationFlag();
            // statement block with csv reblocks
            ret.add(sb1);
            // statement block with remaining hops
            ret.add(sb);
            // avoid later merge by other rewrites
            sb.setSplitDag(true);
        } catch (Exception ex) {
            throw new HopsException("Failed to split hops dag for csv read with unknown size.", ex);
        }
        LOG.debug("Applied splitDagUnknownCSVRead.");
    } else // keep original hop dag
    {
        ret.add(sb);
    }
    return ret;
}
Also used : DataIdentifier(org.apache.sysml.parser.DataIdentifier) ArrayList(java.util.ArrayList) Hop(org.apache.sysml.hops.Hop) HopsException(org.apache.sysml.hops.HopsException) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) HopsException(org.apache.sysml.hops.HopsException) VariableSet(org.apache.sysml.parser.VariableSet) LiteralOp(org.apache.sysml.hops.LiteralOp) DataOp(org.apache.sysml.hops.DataOp) StatementBlock(org.apache.sysml.parser.StatementBlock)

Aggregations

VariableSet (org.apache.sysml.parser.VariableSet)15 ArrayList (java.util.ArrayList)11 Hop (org.apache.sysml.hops.Hop)9 DataIdentifier (org.apache.sysml.parser.DataIdentifier)8 DataOp (org.apache.sysml.hops.DataOp)6 StatementBlock (org.apache.sysml.parser.StatementBlock)6 HopsException (org.apache.sysml.hops.HopsException)4 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)4 WhileStatementBlock (org.apache.sysml.parser.WhileStatementBlock)4 UpdateType (org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)3 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)3 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)3 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)3 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)3 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)3 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)3 HashSet (java.util.HashSet)2 LiteralOp (org.apache.sysml.hops.LiteralOp)2