Search in sources :

Example 31 with DataOp

use of org.apache.sysml.hops.DataOp in project systemml by apache.

the class RewriteInjectSparkLoopCheckpointing method rewriteStatementBlock.

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus status) {
    if (!OptimizerUtils.isSparkExecutionMode()) {
        // nothing to do here, return original statement block
        return Arrays.asList(sb);
    }
    // 1) We currently add checkpoint operations without information about the global program structure,
    // this assumes that redundant checkpointing is prevented at runtime level (instruction-level)
    // 2) Also, we do not take size information into account right now. This means that all candidates
    // are checkpointed even if they are only used by CP operations.
    ArrayList<StatementBlock> ret = new ArrayList<>();
    // block size set by reblock rewrite
    int blocksize = status.getBlocksize();
    // optimization because otherwise we would prevent remote parfor)
    if (// incl parfor
    (sb instanceof WhileStatementBlock || sb instanceof ForStatementBlock) && (_checkCtx ? !status.isInParforContext() : true)) {
        // step 1: determine checkpointing candidates
        ArrayList<String> candidates = new ArrayList<>();
        VariableSet read = sb.variablesRead();
        VariableSet updated = sb.variablesUpdated();
        for (String rvar : read.getVariableNames()) if (!updated.containsVariable(rvar) && read.getVariable(rvar).getDataType() == DataType.MATRIX)
            candidates.add(rvar);
        // step 2: insert statement block with checkpointing operations
        if (// existing candidates
        !candidates.isEmpty()) {
            StatementBlock sb0 = new StatementBlock();
            sb0.setDMLProg(sb.getDMLProg());
            sb0.setParseInfo(sb);
            ArrayList<Hop> hops = new ArrayList<>();
            VariableSet livein = new VariableSet();
            VariableSet liveout = new VariableSet();
            for (String var : candidates) {
                DataIdentifier dat = read.getVariable(var);
                long dim1 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim1() : dat.getDim1();
                long dim2 = (dat instanceof IndexedIdentifier) ? ((IndexedIdentifier) dat).getOrigDim2() : dat.getDim2();
                DataOp tread = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, DataOpTypes.TRANSIENTREAD, dat.getFilename(), dim1, dim2, dat.getNnz(), blocksize, blocksize);
                tread.setRequiresCheckpoint(true);
                DataOp twrite = new DataOp(var, DataType.MATRIX, ValueType.DOUBLE, tread, DataOpTypes.TRANSIENTWRITE, null);
                HopRewriteUtils.setOutputParameters(twrite, dim1, dim2, blocksize, blocksize, dat.getNnz());
                hops.add(twrite);
                livein.addVariable(var, read.getVariable(var));
                liveout.addVariable(var, read.getVariable(var));
            }
            sb0.setHops(hops);
            sb0.setLiveIn(livein);
            sb0.setLiveOut(liveout);
            sb0.setSplitDag(true);
            ret.add(sb0);
            // maintain rewrite status
            status.setInjectedCheckpoints();
        }
    }
    // add original statement block to end
    ret.add(sb);
    return ret;
}
Also used : ForStatementBlock(org.apache.sysml.parser.ForStatementBlock) DataIdentifier(org.apache.sysml.parser.DataIdentifier) ArrayList(java.util.ArrayList) Hop(org.apache.sysml.hops.Hop) VariableSet(org.apache.sysml.parser.VariableSet) DataOp(org.apache.sysml.hops.DataOp) WhileStatementBlock(org.apache.sysml.parser.WhileStatementBlock) ForStatementBlock(org.apache.sysml.parser.ForStatementBlock) StatementBlock(org.apache.sysml.parser.StatementBlock) WhileStatementBlock(org.apache.sysml.parser.WhileStatementBlock) IndexedIdentifier(org.apache.sysml.parser.IndexedIdentifier)

Example 32 with DataOp

use of org.apache.sysml.hops.DataOp in project systemml by apache.

the class RewriteRemovePersistentReadWrite method rule_RemovePersistentDataOp.

private void rule_RemovePersistentDataOp(Hop hop) {
    // check mark processed
    if (hop.isVisited())
        return;
    // recursively process childs
    ArrayList<Hop> inputs = hop.getInput();
    for (int i = 0; i < inputs.size(); i++) rule_RemovePersistentDataOp(inputs.get(i));
    // remove cast if unnecessary
    if (hop instanceof DataOp) {
        DataOp dop = (DataOp) hop;
        DataOpTypes dotype = dop.getDataOpType();
        switch(dotype) {
            case PERSISTENTREAD:
                if (_inputs.contains(dop.getName())) {
                    dop.setDataOpType(DataOpTypes.TRANSIENTREAD);
                    if (hop.getDataType() == DataType.SCALAR) {
                        dop.removeInput("iofilename");
                    }
                    // disable unnecessary reblock of binary block w/ equal block sizes
                    if (dop.requiresReblock() && _inputsMeta.containsKey(dop.getName()) && _inputsMeta.get(dop.getName()) instanceof MetaDataFormat) {
                        MetaDataFormat meta = (MetaDataFormat) _inputsMeta.get(dop.getName());
                        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
                        boolean matchingBlksz = mc.getRowsPerBlock() == dop.getRowsInBlock() && mc.getColsPerBlock() == dop.getColsInBlock();
                        // binary matrix w/ matching dims and frames do not require reblock
                        if (meta.getInputInfo() == InputInfo.BinaryBlockInputInfo && (matchingBlksz || dop.getDataType() == DataType.FRAME)) {
                            dop.setRequiresReblock(false);
                        }
                    }
                } else
                    LOG.warn("Non-registered persistent read of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
                break;
            case PERSISTENTWRITE:
                if (_outputs.contains(dop.getName())) {
                    dop.setDataOpType(DataOpTypes.TRANSIENTWRITE);
                    dop.setRowsInBlock(dop.getInput().get(0).getRowsInBlock());
                    dop.setColsInBlock(dop.getInput().get(0).getColsInBlock());
                    if (hop.getDataType() == DataType.SCALAR) {
                        dop.removeInput("iofilename");
                    }
                } else
                    LOG.warn("Non-registered persistent write of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
                break;
            default:
        }
    }
    // mark processed
    hop.setVisited();
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) DataOpTypes(org.apache.sysml.hops.Hop.DataOpTypes) Hop(org.apache.sysml.hops.Hop) DataOp(org.apache.sysml.hops.DataOp) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 33 with DataOp

use of org.apache.sysml.hops.DataOp in project systemml by apache.

the class RewriteSplitDagDataDependentOperators method rewriteStatementBlock.

@Override
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state) {
    // DAG splits not required for forced single node
    if (DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE || !HopRewriteUtils.isLastLevelStatementBlock(sb))
        return Arrays.asList(sb);
    ArrayList<StatementBlock> ret = new ArrayList<>();
    // collect all unknown csv reads hops
    ArrayList<Hop> cand = new ArrayList<>();
    collectDataDependentOperators(sb.getHops(), cand);
    Hop.resetVisitStatus(sb.getHops());
    // split hop dag on demand
    if (!cand.isEmpty()) {
        // collect child operators of candidates (to prevent rewrite anomalies)
        HashSet<Hop> candChilds = new HashSet<>();
        collectCandidateChildOperators(cand, candChilds);
        try {
            // duplicate sb incl live variable sets
            StatementBlock sb1 = new StatementBlock();
            sb1.setDMLProg(sb.getDMLProg());
            sb1.setParseInfo(sb);
            sb1.setLiveIn(new VariableSet());
            sb1.setLiveOut(new VariableSet());
            // move data-dependent ops incl transient writes to new statement block
            // (and replace original persistent read with transient read)
            ArrayList<Hop> sb1hops = new ArrayList<>();
            for (Hop c : cand) {
                // if there are already transient writes use them and don't introduce artificial variables;
                // unless there are transient reads w/ the same variable name in the current dag which can
                // lead to invalid reordering if variable consumers are not feeding into the candidate op.
                boolean hasTWrites = hasTransientWriteParents(c);
                boolean moveTWrite = hasTWrites ? HopRewriteUtils.rHasSimpleReadChain(c, getFirstTransientWriteParent(c).getName()) : false;
                String varname = null;
                long rlen = c.getDim1();
                long clen = c.getDim2();
                long nnz = c.getNnz();
                UpdateType update = c.getUpdateType();
                int brlen = c.getRowsInBlock();
                int bclen = c.getColsInBlock();
                if (// reuse existing transient_write
                hasTWrites && moveTWrite) {
                    Hop twrite = getFirstTransientWriteParent(c);
                    varname = twrite.getName();
                    // create new transient read
                    DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
                    tread.setVisited();
                    HopRewriteUtils.copyLineNumbers(c, tread);
                    // replace data-dependent operator with transient read
                    ArrayList<Hop> parents = new ArrayList<>(c.getParent());
                    for (int i = 0; i < parents.size(); i++) {
                        // prevent concurrent modification by index access
                        Hop parent = parents.get(i);
                        if (!candChilds.contains(parent)) {
                            // anomaly filter
                            if (parent != twrite)
                                HopRewriteUtils.replaceChildReference(parent, c, tread);
                            else
                                sb.getHops().remove(parent);
                        }
                    }
                    // add data-dependent operator sub dag to first statement block
                    sb1hops.add(twrite);
                } else // create transient write to artificial variables
                {
                    varname = createCutVarName(false);
                    // create new transient read
                    DataOp tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, rlen, clen, nnz, update, brlen, bclen);
                    tread.setVisited();
                    HopRewriteUtils.copyLineNumbers(c, tread);
                    // replace data-dependent operator with transient read
                    ArrayList<Hop> parents = new ArrayList<>(c.getParent());
                    for (int i = 0; i < parents.size(); i++) {
                        // prevent concurrent modification by index access
                        Hop parent = parents.get(i);
                        if (// anomaly filter
                        !candChilds.contains(parent))
                            HopRewriteUtils.replaceChildReference(parent, c, tread);
                    }
                    // add data-dependent operator sub dag to first statement block
                    DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
                    twrite.setVisited();
                    twrite.setOutputParams(rlen, clen, nnz, update, brlen, bclen);
                    HopRewriteUtils.copyLineNumbers(c, twrite);
                    sb1hops.add(twrite);
                }
                // update live in and out of new statement block (for piggybacking)
                DataIdentifier diVar = new DataIdentifier(varname);
                diVar.setDimensions(rlen, clen);
                diVar.setBlockDimensions(brlen, bclen);
                diVar.setDataType(c.getDataType());
                diVar.setValueType(c.getValueType());
                sb1.liveOut().addVariable(varname, new DataIdentifier(diVar));
                sb.liveIn().addVariable(varname, new DataIdentifier(diVar));
            }
            // ensure disjoint operators across DAGs (prevent replicated operations)
            handleReplicatedOperators(sb1hops, sb.getHops(), sb1.liveOut(), sb.liveIn());
            // deep copy new dag (in order to prevent any dangling references)
            sb1.setHops(Recompiler.deepCopyHopsDag(sb1hops));
            sb1.updateRecompilationFlag();
            // avoid later merge by other rewrites
            sb1.setSplitDag(true);
            // recursive application of rewrite rule (in case of multiple data dependent operators
            // with data dependencies in between each other)
            List<StatementBlock> tmp = rewriteStatementBlock(sb1, state);
            // add new statement blocks to output
            // statement block with data dependent hops
            ret.addAll(tmp);
            // statement block with remaining hops
            ret.add(sb);
            // avoid later merge by other rewrites
            sb.setSplitDag(true);
        } catch (Exception ex) {
            throw new HopsException("Failed to split hops dag for data dependent operators with unknown size.", ex);
        }
        LOG.debug("Applied splitDagDataDependentOperators (lines " + sb.getBeginLine() + "-" + sb.getEndLine() + ").");
    } else // keep original hop dag
    {
        ret.add(sb);
    }
    return ret;
}
Also used : DataIdentifier(org.apache.sysml.parser.DataIdentifier) ArrayList(java.util.ArrayList) Hop(org.apache.sysml.hops.Hop) HopsException(org.apache.sysml.hops.HopsException) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) HopsException(org.apache.sysml.hops.HopsException) VariableSet(org.apache.sysml.parser.VariableSet) DataOp(org.apache.sysml.hops.DataOp) StatementBlock(org.apache.sysml.parser.StatementBlock) HashSet(java.util.HashSet)

Example 34 with DataOp

use of org.apache.sysml.hops.DataOp in project systemml by apache.

the class RewriteSplitDagDataDependentOperators method handleReplicatedOperators.

private void handleReplicatedOperators(ArrayList<Hop> rootsSB1, ArrayList<Hop> rootsSB2, VariableSet sb1out, VariableSet sb2in) {
    // step 1: create probe set SB1
    HashSet<Hop> probeSet = new HashSet<>();
    Hop.resetVisitStatus(rootsSB1);
    for (Hop h : rootsSB1) rAddHopsToProbeSet(h, probeSet);
    // step 2: probe SB2 operators top-down (collect cut candidates)
    HashSet<Pair<Hop, Hop>> candSet = new HashSet<>();
    Hop.resetVisitStatus(rootsSB2);
    for (Hop h : rootsSB2) rProbeAndAddHopsToCandidateSet(h, probeSet, candSet);
    // step 3: create additional cuts with reuse for common references
    HashMap<Long, DataOp> reuseTRead = new HashMap<>();
    for (Pair<Hop, Hop> p : candSet) {
        Hop hop = p.getKey();
        Hop c = p.getValue();
        DataOp tread = reuseTRead.get(c.getHopID());
        if (tread == null) {
            String varname = createCutVarName(false);
            tread = new DataOp(varname, c.getDataType(), c.getValueType(), DataOpTypes.TRANSIENTREAD, null, c.getDim1(), c.getDim2(), c.getNnz(), c.getUpdateType(), c.getRowsInBlock(), c.getColsInBlock());
            tread.setVisited();
            HopRewriteUtils.copyLineNumbers(c, tread);
            reuseTRead.put(c.getHopID(), tread);
            DataOp twrite = new DataOp(varname, c.getDataType(), c.getValueType(), c, DataOpTypes.TRANSIENTWRITE, null);
            twrite.setVisited();
            twrite.setOutputParams(c.getDim1(), c.getDim2(), c.getNnz(), c.getUpdateType(), c.getRowsInBlock(), c.getColsInBlock());
            HopRewriteUtils.copyLineNumbers(c, twrite);
            // update live in and out of new statement block (for piggybacking)
            DataIdentifier diVar = new DataIdentifier(varname);
            diVar.setDimensions(c.getDim1(), c.getDim2());
            diVar.setBlockDimensions(c.getRowsInBlock(), c.getColsInBlock());
            diVar.setDataType(c.getDataType());
            diVar.setValueType(c.getValueType());
            sb1out.addVariable(varname, new DataIdentifier(diVar));
            sb2in.addVariable(varname, new DataIdentifier(diVar));
            rootsSB1.add(twrite);
        }
        // create additional cut by rewriting both hop dags
        int pos = HopRewriteUtils.getChildReferencePos(hop, c);
        HopRewriteUtils.removeChildReferenceByPos(hop, c, pos);
        HopRewriteUtils.addChildReference(hop, tread, pos);
    }
}
Also used : DataIdentifier(org.apache.sysml.parser.DataIdentifier) HashMap(java.util.HashMap) Hop(org.apache.sysml.hops.Hop) DataOp(org.apache.sysml.hops.DataOp) HashSet(java.util.HashSet) Pair(org.apache.sysml.runtime.matrix.data.Pair)

Example 35 with DataOp

use of org.apache.sysml.hops.DataOp in project systemml by apache.

the class RewriteTransientWriteParentHandling method rule_RehangTransientWriteParents.

private void rule_RehangTransientWriteParents(Hop hop, ArrayList<Hop> sbHops) {
    if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() == DataOpTypes.TRANSIENTWRITE && !hop.getParent().isEmpty()) {
        // update parents inputs with data op input
        for (Hop p : hop.getParent()) {
            p.getInput().set(p.getInput().indexOf(hop), hop.getInput().get(0));
        }
        // update dataop input parent to add new parents except for
        // dataop itself
        hop.getInput().get(0).getParent().addAll(hop.getParent());
        // remove dataop parents
        hop.getParent().clear();
        // add dataop as root for this Hops DAG
        sbHops.add(hop);
        // do the same thing for my inputs (children)
        for (Hop hi : hop.getInput()) {
            rule_RehangTransientWriteParents(hi, sbHops);
        }
    }
}
Also used : Hop(org.apache.sysml.hops.Hop) DataOp(org.apache.sysml.hops.DataOp)

Aggregations

DataOp (org.apache.sysml.hops.DataOp)86 Hop (org.apache.sysml.hops.Hop)75 LiteralOp (org.apache.sysml.hops.LiteralOp)44 ArrayList (java.util.ArrayList)23 AggUnaryOp (org.apache.sysml.hops.AggUnaryOp)20 UnaryOp (org.apache.sysml.hops.UnaryOp)18 StatementBlock (org.apache.sysml.parser.StatementBlock)17 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)17 HopsException (org.apache.sysml.hops.HopsException)16 IndexingOp (org.apache.sysml.hops.IndexingOp)16 HashMap (java.util.HashMap)13 FunctionOp (org.apache.sysml.hops.FunctionOp)13 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)13 WhileStatementBlock (org.apache.sysml.parser.WhileStatementBlock)13 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)12 DataIdentifier (org.apache.sysml.parser.DataIdentifier)11 IfStatementBlock (org.apache.sysml.parser.IfStatementBlock)11 Data (org.apache.sysml.runtime.instructions.cp.Data)11 BinaryOp (org.apache.sysml.hops.BinaryOp)9 LeftIndexingOp (org.apache.sysml.hops.LeftIndexingOp)9