Search in sources :

Example 81 with Instruction

use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.

the class Dag method deleteUpdatedTransientReadVariables.

private static void deleteUpdatedTransientReadVariables(StatementBlock sb, ArrayList<Lop> nodeV, ArrayList<Instruction> inst) {
    if (sb == null)
        return;
    if (LOG.isTraceEnabled())
        LOG.trace("In delete updated variables");
    // CANDIDATE list of variables which could have been updated in this statement block
    HashMap<String, Lop> labelNodeMapping = new HashMap<>();
    // ACTUAL list of variables whose value is updated, AND the old value of the variable
    // is no longer accessible/used.
    HashSet<String> updatedLabels = new HashSet<>();
    HashMap<String, Lop> updatedLabelsLineNum = new HashMap<>();
    // first capture all transient read variables
    for (Lop node : nodeV) {
        if (node.getExecLocation() == ExecLocation.Data && ((Data) node).isTransient() && ((Data) node).getOperationType() == OperationTypes.READ && ((Data) node).getDataType() == DataType.MATRIX) {
            // "node" is considered as updated ONLY IF the old value is not used any more
            // So, make sure that this READ node does not feed into any (transient/persistent) WRITE
            boolean hasWriteParent = false;
            for (Lop p : node.getOutputs()) {
                if (p.getExecLocation() == ExecLocation.Data) {
                    // if the "p" is of type Data, then it has to be a WRITE
                    hasWriteParent = true;
                    break;
                }
            }
            if (!hasWriteParent) {
                // node has no parent of type WRITE, so this is a CANDIDATE variable
                // add it to labelNodeMapping so that it is considered in further processing
                labelNodeMapping.put(node.getOutputParameters().getLabel(), node);
            }
        }
    }
    // capture updated transient write variables
    for (Lop node : nodeV) {
        if (node.getExecLocation() == ExecLocation.Data && ((Data) node).isTransient() && ((Data) node).getOperationType() == OperationTypes.WRITE && ((Data) node).getDataType() == DataType.MATRIX && // check to make sure corresponding (i.e., with the same label/name) transient read is present
        labelNodeMapping.containsKey(node.getOutputParameters().getLabel()) && // check to avoid cases where transient read feeds into a transient write
        !labelNodeMapping.containsValue(node.getInputs().get(0))) {
            updatedLabels.add(node.getOutputParameters().getLabel());
            updatedLabelsLineNum.put(node.getOutputParameters().getLabel(), node);
        }
    }
    // generate RM instructions
    Instruction rm_inst = null;
    for (String label : updatedLabels) {
        rm_inst = VariableCPInstruction.prepareRemoveInstruction(label);
        rm_inst.setLocation(updatedLabelsLineNum.get(label));
        if (LOG.isTraceEnabled())
            LOG.trace(rm_inst.toString());
        inst.add(rm_inst);
    }
}
Also used : HashMap(java.util.HashMap) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) HashSet(java.util.HashSet)

Example 82 with Instruction

use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.

the class Dag method doGreedyGrouping.

/**
 * Method to group a vector of sorted lops.
 *
 * @param sb statement block
 * @param node_v list of low-level operators
 * @return list of instructions
 */
private ArrayList<Instruction> doGreedyGrouping(StatementBlock sb, ArrayList<Lop> node_v) {
    if (LOG.isTraceEnabled())
        LOG.trace("Grouping DAG ============");
    // nodes to be executed in current iteration
    ArrayList<Lop> execNodes = new ArrayList<>();
    // nodes that have already been processed
    ArrayList<Lop> finishedNodes = new ArrayList<>();
    // nodes that are queued for the following iteration
    ArrayList<Lop> queuedNodes = new ArrayList<>();
    ArrayList<ArrayList<Lop>> jobNodes = createNodeVectors(JobType.getNumJobTypes());
    // list of instructions
    ArrayList<Instruction> inst = new ArrayList<>();
    // ArrayList<Instruction> preWriteDeleteInst = new ArrayList<Instruction>();
    ArrayList<Instruction> writeInst = new ArrayList<>();
    ArrayList<Instruction> deleteInst = new ArrayList<>();
    ArrayList<Instruction> endOfBlockInst = new ArrayList<>();
    // remove files for transient reads that are updated.
    deleteUpdatedTransientReadVariables(sb, node_v, writeInst);
    generateRemoveInstructions(sb, endOfBlockInst);
    generateInstructionsForInputVariables(node_v, inst);
    boolean done = false;
    String indent = "    ";
    while (!done) {
        if (LOG.isTraceEnabled())
            LOG.trace("Grouping nodes in DAG");
        execNodes.clear();
        queuedNodes.clear();
        clearNodeVectors(jobNodes);
        gmrMapperFootprint = 0;
        for (Lop node : node_v) {
            // finished nodes don't need to be processed
            if (finishedNodes.contains(node))
                continue;
            if (LOG.isTraceEnabled())
                LOG.trace("Processing node (" + node.getID() + ") " + node.toString() + " exec nodes size is " + execNodes.size());
            // its children nodes in execNodes
            if (node.definesMRJob() && !compatibleWithChildrenInExecNodes(execNodes, node)) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Queueing node " + node.toString() + " (code 1)");
                queuedNodes.add(node);
                removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                continue;
            }
            // iteration
            if (hasChildNode(node, queuedNodes)) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Queueing node " + node.toString() + " (code 2)");
                queuedNodes.add(node);
                // if node has more than two inputs,
                // remove children that will be needed in a future
                // iterations
                // may also have to remove parent nodes of these children
                removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                continue;
            }
            // if inputs come from different jobs, then queue
            if (node.getInputs().size() >= 2) {
                int jobid = Integer.MIN_VALUE;
                boolean queueit = false;
                for (int idx = 0; idx < node.getInputs().size(); idx++) {
                    int input_jobid = jobType(node.getInputs().get(idx), jobNodes);
                    if (input_jobid != -1) {
                        if (jobid == Integer.MIN_VALUE)
                            jobid = input_jobid;
                        else if (jobid != input_jobid) {
                            queueit = true;
                            break;
                        }
                    }
                }
                if (queueit) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing node " + node.toString() + " (code 3)");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                    continue;
                }
            }
            // See if this lop can be eliminated
            // This check is for "aligner" lops (e.g., group)
            boolean eliminate = false;
            eliminate = canEliminateLop(node, execNodes);
            if (eliminate) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding -" + node.toString());
                execNodes.add(node);
                finishedNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, eliminate);
                continue;
            }
            // children that defines a MR Job are present in execNodes
            if (node.definesMRJob()) {
                if (hasMRJobChildNode(node, execNodes)) {
                    // this is because "group" can be pushed into the "Rand" job.
                    if (!(node.getType() == Lop.Type.Grouping && checkDataGenAsChildNode(node, execNodes))) {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing node " + node.toString() + " (code 4)");
                        queuedNodes.add(node);
                        removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                        continue;
                    }
                }
            }
            // not, queue "node"
            if (node.getInputs().size() > 1 && hasChildNode(node, execNodes, ExecLocation.RecordReader)) {
                // get the actual RecordReader lop
                Lop rr_node = getChildNode(node, execNodes, ExecLocation.RecordReader);
                // all inputs of "node" must be ancestors of rr_node
                boolean queue_it = false;
                for (Lop n : node.getInputs()) {
                    // each input should be ancestor of RecordReader lop
                    if (!n.equals(rr_node) && !isChild(rr_node, n, IDMap)) {
                        // i.e., "node" must be queued
                        queue_it = true;
                        break;
                    }
                }
                if (queue_it) {
                    // queue node
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 5)");
                    queuedNodes.add(node);
                    // TODO: does this have to be modified to handle
                    // recordreader lops?
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                    continue;
                } else {
                // nothing here.. subsequent checks have to be performed
                // on "node"
                }
            }
            // only write nodes are kept in execnodes
            if (node.getExecLocation() == ExecLocation.Data) {
                Data dnode = (Data) node;
                boolean dnode_queued = false;
                if (dnode.getOperationType() == OperationTypes.READ) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding Data -" + node.toString());
                    // TODO: avoid readScalar instruction, and read it on-demand just like the way Matrices are read in control program
                    if (node.getDataType() == DataType.SCALAR && // TODO: LEO check the following condition is still needed
                    node.getOutputParameters().getFile_name() != null) {
                        // this lop corresponds to reading a scalar from HDFS file
                        // add it to execNodes so that "readScalar" instruction gets generated
                        execNodes.add(node);
                    // note: no need to add it to any job vector
                    }
                } else if (dnode.getOperationType() == OperationTypes.WRITE) {
                    // Skip the transient write <code>node</code> if the input is a
                    // transient read with the same variable name. i.e., a dummy copy.
                    // Hence, <code>node</code> can be avoided.
                    // TODO: this case should ideally be handled in the language layer
                    // prior to the construction of Hops Dag
                    Lop input = dnode.getInputs().get(0);
                    if (dnode.isTransient() && input.getExecLocation() == ExecLocation.Data && ((Data) input).isTransient() && dnode.getOutputParameters().getLabel().equals(input.getOutputParameters().getLabel())) {
                    // do nothing, <code>node</code> must not processed any further.
                    } else if (execNodes.contains(input) && !isCompatible(node, input) && sendWriteLopToMR(node)) {
                        // input is in execNodes but it is not compatible with write lop. So, queue the write lop.
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing -" + node.toString());
                        queuedNodes.add(node);
                        dnode_queued = true;
                    } else {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Adding Data -" + node.toString());
                        execNodes.add(node);
                        if (sendWriteLopToMR(node)) {
                            addNodeByJobType(node, jobNodes, execNodes, false);
                        }
                    }
                }
                if (!dnode_queued)
                    finishedNodes.add(node);
                continue;
            }
            // map or reduce node, can always be piggybacked with parent
            if (node.getExecLocation() == ExecLocation.MapOrReduce) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding -" + node.toString());
                execNodes.add(node);
                finishedNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, false);
                continue;
            }
            // RecordReader node, add, if no parent needs reduce, else queue
            if (node.getExecLocation() == ExecLocation.RecordReader) {
                // execNodes .. it has to be the first one in the job!
                if (!hasChildNode(node, execNodes, ExecLocation.Map) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding -" + node.toString());
                    execNodes.add(node);
                    finishedNodes.add(node);
                    addNodeByJobType(node, jobNodes, execNodes, false);
                } else {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 6)");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                }
                continue;
            }
            // map node, add, if no parent needs reduce, else queue
            if (node.getExecLocation() == ExecLocation.Map) {
                boolean queueThisNode = false;
                int subcode = -1;
                if (node.usesDistributedCache()) {
                    // if an input to <code>node</code> comes from distributed cache
                    // then that input must get executed in one of the previous jobs.
                    int[] dcInputIndexes = node.distributedCacheInputIndex();
                    for (int dcInputIndex : dcInputIndexes) {
                        Lop dcInput = node.getInputs().get(dcInputIndex - 1);
                        if ((dcInput.getType() != Lop.Type.Data && dcInput.getExecType() == ExecType.MR) && execNodes.contains(dcInput)) {
                            queueThisNode = true;
                            subcode = 1;
                        }
                    }
                    // Limit the number of distributed cache inputs based on the available memory in mappers
                    double memsize = computeFootprintInMapper(node);
                    if (gmrMapperFootprint > 0 && !checkMemoryLimits(node, gmrMapperFootprint + memsize)) {
                        queueThisNode = true;
                        subcode = 2;
                    }
                    if (!queueThisNode)
                        gmrMapperFootprint += memsize;
                }
                if (!queueThisNode && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasMRJobChildNode(node, execNodes)) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding -" + node.toString());
                    execNodes.add(node);
                    finishedNodes.add(node);
                    addNodeByJobType(node, jobNodes, execNodes, false);
                } else {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 7 - " + "subcode " + subcode + ")");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                }
                continue;
            }
            // reduce node, make sure no parent needs reduce, else queue
            if (node.getExecLocation() == ExecLocation.MapAndReduce) {
                // not define a job
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding -" + node.toString());
                execNodes.add(node);
                finishedNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, eliminate);
                continue;
            }
            // aligned reduce, make sure a parent that is reduce exists
            if (node.getExecLocation() == ExecLocation.Reduce) {
                if (compatibleWithChildrenInExecNodes(execNodes, node) && (hasChildNode(node, execNodes, ExecLocation.MapAndReduce) || hasChildNode(node, execNodes, ExecLocation.Map))) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding -" + node.toString());
                    execNodes.add(node);
                    finishedNodes.add(node);
                    addNodeByJobType(node, jobNodes, execNodes, false);
                } else {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 8)");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                }
                continue;
            }
            // that will be executed in a MR job.
            if (node.getExecLocation() == ExecLocation.ControlProgram) {
                for (Lop lop : node.getInputs()) {
                    if (execNodes.contains(lop) && !(lop.getExecLocation() == ExecLocation.Data) && !(lop.getExecLocation() == ExecLocation.ControlProgram)) {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing -" + node.toString() + " (code 9)");
                        queuedNodes.add(node);
                        removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                        break;
                    }
                }
                if (queuedNodes.contains(node))
                    continue;
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding - scalar" + node.toString());
                execNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, false);
                finishedNodes.add(node);
                continue;
            }
        }
        // no work to do
        if (execNodes.isEmpty()) {
            if (!queuedNodes.isEmpty())
                throw new LopsException("Queued nodes should not be 0 at this point \n");
            if (LOG.isTraceEnabled())
                LOG.trace("All done! queuedNodes = " + queuedNodes.size());
            done = true;
        } else {
            if (LOG.isTraceEnabled())
                LOG.trace("Generating jobs for group -- Node count=" + execNodes.size());
            // first process scalar instructions
            generateControlProgramJobs(execNodes, inst, writeInst, deleteInst);
            // copy unassigned lops in execnodes to gmrnodes
            for (int i = 0; i < execNodes.size(); i++) {
                Lop node = execNodes.get(i);
                if (jobType(node, jobNodes) == -1) {
                    if (isCompatible(node, JobType.GMR)) {
                        if (node.hasNonBlockedInputs()) {
                            jobNodes.get(JobType.GMRCELL.getId()).add(node);
                            addChildren(node, jobNodes.get(JobType.GMRCELL.getId()), execNodes);
                        } else {
                            jobNodes.get(JobType.GMR.getId()).add(node);
                            addChildren(node, jobNodes.get(JobType.GMR.getId()), execNodes);
                        }
                    } else {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing -" + node.toString() + " (code 10)");
                        execNodes.remove(i);
                        finishedNodes.remove(node);
                        queuedNodes.add(node);
                        removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                    }
                }
            }
            // next generate MR instructions
            if (!execNodes.isEmpty())
                generateMRJobs(execNodes, inst, writeInst, deleteInst, jobNodes);
            handleSingleOutputJobs(execNodes, jobNodes, finishedNodes);
        }
    }
    // add write and delete inst at the very end.
    // inst.addAll(preWriteDeleteInst);
    inst.addAll(writeInst);
    inst.addAll(deleteInst);
    inst.addAll(endOfBlockInst);
    return inst;
}
Also used : ArrayList(java.util.ArrayList) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) LopsException(org.apache.sysml.lops.LopsException)

Example 83 with Instruction

use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.

the class Dag method createPackedRmvarInstructions.

private static ArrayList<Instruction> createPackedRmvarInstructions(ArrayList<Instruction> insts) {
    ArrayList<Instruction> ret = new ArrayList<>();
    ArrayList<String> currRmVar = new ArrayList<>();
    for (Instruction inst : insts) {
        if (inst instanceof VariableCPInstruction && ((VariableCPInstruction) inst).isRemoveVariableNoFile()) {
            // collect all subsequent rmvar instructions
            currRmVar.add(((VariableCPInstruction) inst).getInput1().getName());
        } else {
            // construct packed rmvar instruction
            if (!currRmVar.isEmpty()) {
                ret.add(VariableCPInstruction.prepareRemoveInstruction(currRmVar.toArray(new String[0])));
                currRmVar.clear();
            }
            // add other instruction
            ret.add(inst);
        }
    }
    // construct last packed rmvar instruction
    if (!currRmVar.isEmpty()) {
        ret.add(VariableCPInstruction.prepareRemoveInstruction(currRmVar.toArray(new String[0])));
    }
    return ret;
}
Also used : VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) ArrayList(java.util.ArrayList) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction)

Example 84 with Instruction

use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.

the class Dag method setupNodeOutputs.

/**
 * Method to setup output filenames and outputInfos, and to generate related instructions
 *
 * @param node low-level operator
 * @param et exec type
 * @param cellModeOverride override mode
 * @param copyTWrite ?
 * @return node output
 */
private NodeOutput setupNodeOutputs(Lop node, ExecType et, boolean cellModeOverride, boolean copyTWrite) {
    OutputParameters oparams = node.getOutputParameters();
    NodeOutput out = new NodeOutput();
    node.setConsumerCount(node.getOutputs().size());
    // Compute the output format for this node
    out.setOutInfo(getOutputInfo(node, cellModeOverride));
    // since outputs are explicitly specified
    if (node.getExecLocation() != ExecLocation.Data) {
        if (node.getDataType() == DataType.SCALAR) {
            oparams.setLabel(Lop.SCALAR_VAR_NAME_PREFIX + var_index.getNextID());
            out.setVarName(oparams.getLabel());
            Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
            currInstr.setLocation(node);
            out.addLastInstruction(currInstr);
        } else if (// general case
        !(node instanceof FunctionCallCP)) {
            // generate temporary filename and a variable name to hold the
            // output produced by "rootNode"
            oparams.setFile_name(getNextUniqueFilename());
            oparams.setLabel(getNextUniqueVarname(node.getDataType()));
            // generate an instruction that creates a symbol table entry for the new variable
            // String createInst = prepareVariableInstruction("createvar", node);
            // out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
            int rpb = (int) oparams.getRowsInBlock();
            int cpb = (int) oparams.getColsInBlock();
            Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), oparams.getFile_name(), true, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
            createvarInst.setLocation(node);
            out.addPreInstruction(createvarInst);
            // temp file as well as the variable has to be deleted at the end
            Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
            currInstr.setLocation(node);
            out.addLastInstruction(currInstr);
            // finally, add the generated filename and variable name to the list of outputs
            out.setFileName(oparams.getFile_name());
            out.setVarName(oparams.getLabel());
        } else {
            // If the function call is set with output lops (e.g., multi return builtin),
            // generate a createvar instruction for each function output
            FunctionCallCP fcall = (FunctionCallCP) node;
            if (fcall.getFunctionOutputs() != null) {
                for (Lop fnOut : fcall.getFunctionOutputs()) {
                    OutputParameters fnOutParams = fnOut.getOutputParameters();
                    // OutputInfo oinfo = getOutputInfo((N)fnOut, false);
                    Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(fnOutParams.getLabel(), getFilePath() + fnOutParams.getLabel(), true, fnOut.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(fnOut, false)), new MatrixCharacteristics(fnOutParams.getNumRows(), fnOutParams.getNumCols(), (int) fnOutParams.getRowsInBlock(), (int) fnOutParams.getColsInBlock(), fnOutParams.getNnz()), oparams.getUpdateType());
                    if (node._beginLine != 0)
                        createvarInst.setLocation(node);
                    else
                        createvarInst.setLocation(fnOut);
                    out.addPreInstruction(createvarInst);
                }
            }
        }
    } else // rootNode is of type Data
    {
        if (node.getDataType() == DataType.SCALAR) {
            // generate assignment operations for final and transient writes
            if (oparams.getFile_name() == null && !(node instanceof Data && ((Data) node).isPersistentWrite())) {
                String io_inst = prepareAssignVarInstruction(node.getInputs().get(0), node);
                CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
                if (node._beginLine != 0)
                    currInstr.setLocation(node);
                else if (!node.getInputs().isEmpty())
                    currInstr.setLocation(node.getInputs().get(0));
                out.addLastInstruction(currInstr);
            } else {
                // CP PERSISTENT WRITE SCALARS
                Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
                String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
                CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
                if (node._beginLine != 0)
                    currInstr.setLocation(node);
                else if (!node.getInputs().isEmpty())
                    currInstr.setLocation(node.getInputs().get(0));
                out.addLastInstruction(currInstr);
            }
        } else {
            if (((Data) node).isTransient()) {
                if (et == ExecType.CP) {
                    // If transient matrix write is in CP then its input MUST be executed in CP as well.
                    // get variable and filename associated with the input
                    String inputFileName = node.getInputs().get(0).getOutputParameters().getFile_name();
                    String inputVarName = node.getInputs().get(0).getOutputParameters().getLabel();
                    String constVarName = oparams.getLabel();
                    String constFileName = inputFileName + constVarName;
                    /*
						 * Symbol Table state must change as follows:
						 * 
						 * FROM:
						 *     mvar1 -> temp21
						 *  
						 * TO:
						 *     mVar1 -> temp21
						 *     tVarH -> temp21
						 */
                    Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(inputVarName, constVarName);
                    currInstr.setLocation(node);
                    out.addLastInstruction(currInstr);
                    out.setFileName(constFileName);
                } else {
                    if (copyTWrite) {
                        Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(node.getInputs().get(0).getOutputParameters().getLabel(), oparams.getLabel());
                        currInstr.setLocation(node);
                        out.addLastInstruction(currInstr);
                        return out;
                    }
                    /*
						 * Since the "rootNode" is a transient data node, we first need to generate a 
						 * temporary filename as well as a variable name to hold the <i>immediate</i> 
						 * output produced by "rootNode". These generated HDFS filename and the 
						 * variable name must be changed at the end of an iteration/program block 
						 * so that the subsequent iteration/program block can correctly access the 
						 * generated data. Therefore, we need to distinguish between the following:
						 * 
						 *   1) Temporary file name & variable name: They hold the immediate output 
						 *   produced by "rootNode". Both names are generated below.
						 *   
						 *   2) Constant file name & variable name: They are constant across iterations. 
						 *   Variable name is given by rootNode's label that is created in the upper layers.  
						 *   File name is generated by concatenating "temporary file name" and "constant variable name".
						 *   
						 * Temporary files must be moved to constant files at the end of the iteration/program block.
						 */
                    // generate temporary filename & var name
                    String tempVarName = oparams.getLabel() + "temp";
                    String tempFileName = getNextUniqueFilename();
                    int rpb = (int) oparams.getRowsInBlock();
                    int cpb = (int) oparams.getColsInBlock();
                    Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(tempVarName, tempFileName, true, node.getDataType(), OutputInfo.outputInfoToString(out.getOutInfo()), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
                    createvarInst.setLocation(node);
                    out.addPreInstruction(createvarInst);
                    String constVarName = oparams.getLabel();
                    String constFileName = tempFileName + constVarName;
                    oparams.setFile_name(getFilePath() + constFileName);
                    /*
						 * Since this is a node that denotes a transient read/write, we need to make sure 
						 * that the data computed for a given variable in a given iteration is passed on 
						 * to the next iteration. This is done by generating miscellaneous instructions 
						 * that gets executed at the end of the program block.
						 * 
						 * The state of the symbol table must change 
						 * 
						 * FROM: 
						 *     tVarA -> temp21tVarA (old copy of temp21)
						 *     tVarAtemp -> temp21  (new copy that should override the old copy) 
						 *
						 * TO:
						 *     tVarA -> temp21tVarA
						 */
                    // Generate a single mvvar instruction (e.g., mvvar tempA A)
                    // instead of two instructions "cpvar tempA A" and "rmvar tempA"
                    Instruction currInstr = VariableCPInstruction.prepareMoveInstruction(tempVarName, constVarName);
                    currInstr.setLocation(node);
                    out.addLastInstruction(currInstr);
                    // finally, add the temporary filename and variable name to the list of outputs
                    out.setFileName(tempFileName);
                    out.setVarName(tempVarName);
                }
            } else // rootNode is not a transient write. It is a persistent write.
            {
                if (et == ExecType.MR) {
                    // MR PERSISTENT WRITE
                    // create a variable to hold the result produced by this "rootNode"
                    oparams.setLabel("pVar" + var_index.getNextID());
                    int rpb = (int) oparams.getRowsInBlock();
                    int cpb = (int) oparams.getColsInBlock();
                    Lop fnameLop = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
                    String fnameStr = (fnameLop instanceof Data && ((Data) fnameLop).isLiteral()) ? fnameLop.getOutputParameters().getLabel() : Lop.VARIABLE_NAME_PLACEHOLDER + fnameLop.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER;
                    Instruction createvarInst;
                    // part MM format file on hdfs.
                    if (oparams.getFormat() == Format.CSV) {
                        String tempFileName = getNextUniqueFilename();
                        String createInst = node.getInstructions(tempFileName);
                        createvarInst = CPInstructionParser.parseSingleInstruction(createInst);
                        // NOTE: no instruction patching because final write from cp instruction
                        String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
                        CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
                        currInstr.setLocation(node);
                        out.addPostInstruction(currInstr);
                        // remove the variable
                        CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
                        tempInstr.setLocation(node);
                        out.addLastInstruction(tempInstr);
                    } else if (oparams.getFormat() == Format.MM) {
                        createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), getNextUniqueFilename(), false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
                        // NOTE: no instruction patching because final write from cp instruction
                        String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
                        CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
                        currInstr.setLocation(node);
                        out.addPostInstruction(currInstr);
                        // remove the variable
                        CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
                        tempInstr.setLocation(node);
                        out.addLastInstruction(tempInstr);
                    } else {
                        createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), fnameStr, false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
                        // remove the variable
                        CPInstruction currInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "false" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
                        currInstr.setLocation(node);
                        out.addLastInstruction(currInstr);
                    }
                    createvarInst.setLocation(node);
                    out.addPreInstruction(createvarInst);
                    // finally, add the filename and variable name to the list of outputs
                    out.setFileName(oparams.getFile_name());
                    out.setVarName(oparams.getLabel());
                } else {
                    // CP PERSISTENT WRITE
                    // generate a write instruction that writes matrix to HDFS
                    Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
                    String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
                    Instruction currInstr = (node.getExecType() == ExecType.SPARK) ? SPInstructionParser.parseSingleInstruction(io_inst) : CPInstructionParser.parseSingleInstruction(io_inst);
                    currInstr.setLocation((!node.getInputs().isEmpty() && node.getInputs().get(0)._beginLine != 0) ? node.getInputs().get(0) : node);
                    out.addLastInstruction(currInstr);
                }
            }
        }
    }
    return out;
}
Also used : CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) OutputParameters(org.apache.sysml.lops.OutputParameters) FunctionCallCP(org.apache.sysml.lops.FunctionCallCP) Data(org.apache.sysml.lops.Data) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) Lop(org.apache.sysml.lops.Lop) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 85 with Instruction

use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.

the class Dag method generateMapReduceInstructions.

/**
 * Method to generate MapReduce job instructions from a given set of nodes.
 *
 * @param execNodes list of exec nodes
 * @param inst list of instructions
 * @param writeinst list of write instructions
 * @param deleteinst list of delete instructions
 * @param rmvarinst list of rmvar instructions
 * @param jt job type
 */
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
    ArrayList<Byte> resultIndices = new ArrayList<>();
    ArrayList<String> inputs = new ArrayList<>();
    ArrayList<String> outputs = new ArrayList<>();
    ArrayList<InputInfo> inputInfos = new ArrayList<>();
    ArrayList<OutputInfo> outputInfos = new ArrayList<>();
    ArrayList<Long> numRows = new ArrayList<>();
    ArrayList<Long> numCols = new ArrayList<>();
    ArrayList<Long> numRowsPerBlock = new ArrayList<>();
    ArrayList<Long> numColsPerBlock = new ArrayList<>();
    ArrayList<String> mapperInstructions = new ArrayList<>();
    ArrayList<String> randInstructions = new ArrayList<>();
    ArrayList<String> recordReaderInstructions = new ArrayList<>();
    int numReducers = 0;
    int replication = 1;
    ArrayList<String> inputLabels = new ArrayList<>();
    ArrayList<String> outputLabels = new ArrayList<>();
    ArrayList<Instruction> renameInstructions = new ArrayList<>();
    ArrayList<Instruction> variableInstructions = new ArrayList<>();
    ArrayList<Instruction> postInstructions = new ArrayList<>();
    ArrayList<Integer> MRJobLineNumbers = null;
    if (DMLScript.ENABLE_DEBUG_MODE) {
        MRJobLineNumbers = new ArrayList<>();
    }
    ArrayList<Lop> inputLops = new ArrayList<>();
    boolean cellModeOverride = false;
    /* Find the nodes that produce an output */
    ArrayList<Lop> rootNodes = new ArrayList<>();
    getOutputNodes(execNodes, rootNodes, jt);
    if (LOG.isTraceEnabled())
        LOG.trace("# of root nodes = " + rootNodes.size());
    /* Remove transient writes that are simple copy of transient reads */
    if (jt == JobType.GMR || jt == JobType.GMRCELL) {
        ArrayList<Lop> markedNodes = new ArrayList<>();
        // only keep data nodes that are results of some computation.
        for (Lop rnode : rootNodes) {
            if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
                // no computation, just a copy
                if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
                    markedNodes.add(rnode);
                }
            }
        }
        // delete marked nodes
        rootNodes.removeAll(markedNodes);
        markedNodes.clear();
        if (rootNodes.isEmpty())
            return;
    }
    // structure that maps node to their indices that will be used in the instructions
    HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
    for (Lop rnode : rootNodes) {
        getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
    }
    // In case of RAND job, instructions are defined in the input file
    if (jt == JobType.DATAGEN)
        randInstructions = inputs;
    int[] start_index = new int[1];
    start_index[0] = inputs.size();
    // currently, recordreader instructions are allowed only in GMR jobs
    if (jt == JobType.GMR || jt == JobType.GMRCELL) {
        for (Lop rnode : rootNodes) {
            getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
            if (recordReaderInstructions.size() > 1)
                throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
        }
    }
    // 
    if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
        for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
            cellModeOverride = true;
    }
    if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
        cellModeOverride = true;
    for (int i = 0; i < rootNodes.size(); i++) {
        getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("    Input strings: " + inputs.toString());
        if (jt == JobType.DATAGEN)
            LOG.trace("    Rand instructions: " + getCSVString(randInstructions));
        if (jt == JobType.GMR)
            LOG.trace("    RecordReader instructions: " + getCSVString(recordReaderInstructions));
        LOG.trace("    Mapper instructions: " + getCSVString(mapperInstructions));
    }
    /* Get Shuffle and Reducer Instructions */
    ArrayList<String> shuffleInstructions = new ArrayList<>();
    ArrayList<String> aggInstructionsReducer = new ArrayList<>();
    ArrayList<String> otherInstructionsReducer = new ArrayList<>();
    for (Lop rn : rootNodes) {
        int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
        if (resultIndex == -1)
            throw new LopsException("Unexpected error in piggybacking!");
        if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
            // Both rn (a transient write) and its input are root nodes.
            // Instead of creating two copies of the data, simply generate a cpvar instruction
            NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
            writeinst.addAll(out.getLastInstructions());
        } else {
            resultIndices.add(Byte.valueOf((byte) resultIndex));
            // setup output filenames and outputInfos and generate related instructions
            NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
            outputLabels.add(out.getVarName());
            outputs.add(out.getFileName());
            outputInfos.add(out.getOutInfo());
            if (LOG.isTraceEnabled()) {
                LOG.trace("    Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
            }
            renameInstructions.addAll(out.getLastInstructions());
            variableInstructions.addAll(out.getPreInstructions());
            postInstructions.addAll(out.getPostInstructions());
        }
    }
    /* Determine if the output dimensions are known */
    byte[] resultIndicesByte = new byte[resultIndices.size()];
    for (int i = 0; i < resultIndicesByte.length; i++) {
        resultIndicesByte[i] = resultIndices.get(i).byteValue();
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("    Shuffle Instructions: " + getCSVString(shuffleInstructions));
        LOG.trace("    Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
        LOG.trace("    Other instructions =" + getCSVString(otherInstructionsReducer));
        LOG.trace("    Output strings: " + outputs.toString());
        LOG.trace("    ResultIndices = " + resultIndices.toString());
    }
    /* Prepare the MapReduce job instruction */
    MRJobInstruction mr = new MRJobInstruction(jt);
    // check if this is a map-only job. If not, set the number of reducers
    if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
        numReducers = total_reducers;
    // set inputs, outputs, and other other properties for the job
    mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
    mr.setOutputs(resultIndicesByte);
    mr.setDimsUnknownFilePrefix(getFilePath());
    mr.setNumberOfReducers(numReducers);
    mr.setReplication(replication);
    // set instructions for recordReader and mapper
    mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
    mr.setMapperInstructions(getCSVString(mapperInstructions));
    // compute and set mapper memory requirements (for consistency of runtime piggybacking)
    if (jt == JobType.GMR) {
        double mem = 0;
        for (Lop n : execNodes) mem += computeFootprintInMapper(n);
        mr.setMemoryRequirements(mem);
    }
    if (jt == JobType.DATAGEN)
        mr.setRandInstructions(getCSVString(randInstructions));
    // set shuffle instructions
    mr.setShuffleInstructions(getCSVString(shuffleInstructions));
    // set reducer instruction
    mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
    mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
    if (DMLScript.ENABLE_DEBUG_MODE) {
        // set line number information for each MR instruction
        mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
    }
    /* Add the prepared instructions to output set */
    inst.addAll(variableInstructions);
    inst.add(mr);
    inst.addAll(postInstructions);
    deleteinst.addAll(renameInstructions);
    for (Lop l : inputLops) {
        if (DMLScript.ENABLE_DEBUG_MODE) {
            processConsumers(l, rmvarinst, deleteinst, l);
        } else {
            processConsumers(l, rmvarinst, deleteinst, null);
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) LopsException(org.apache.sysml.lops.LopsException)

Aggregations

Instruction (org.apache.sysml.runtime.instructions.Instruction)132 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)90 FunctionCallCPInstruction (org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction)60 VariableCPInstruction (org.apache.sysml.runtime.instructions.cp.VariableCPInstruction)60 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)56 ArrayList (java.util.ArrayList)40 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)35 ExternalFunctionInvocationInstruction (org.apache.sysml.udf.ExternalFunctionInvocationInstruction)35 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)33 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)33 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)33 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)33 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)32 SPInstruction (org.apache.sysml.runtime.instructions.spark.SPInstruction)32 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)30 GPUInstruction (org.apache.sysml.runtime.instructions.gpu.GPUInstruction)28 SpoofCPInstruction (org.apache.sysml.runtime.instructions.cp.SpoofCPInstruction)26 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)24 Lop (org.apache.sysml.lops.Lop)23 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)19