Examples with Data - org.apache.sysml.lops.Data

Example 6 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class UnaryOp method constructLopsIQM.

private Lop constructLopsIQM() throws HopsException, LopsException {
    ExecType et = optFindExecType();
    Hop input = getInput().get(0);
    if (et == ExecType.MR) {
        CombineUnary combine = CombineUnary.constructCombineLop(input.constructLops(), DataType.MATRIX, getValueType());
        combine.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
        SortKeys sort = SortKeys.constructSortByValueLop(combine, SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // Sort dimensions are same as the first input
        sort.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
        Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));
        lit.setAllPositions(this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
        PickByCount pick = new PickByCount(sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);
        pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
        setLineNumbers(pick);
        PartialAggregate pagg = new PartialAggregate(pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType());
        setLineNumbers(pagg);
        // Set the dimensions of PartialAggregate LOP based on the
        // direction in which aggregation is performed
        pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());
        Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
        group1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(group1);
        Aggregate agg1 = new Aggregate(group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR);
        agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        Unary iqm = new Unary(sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP);
        iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(iqm);
        return iqm;
    } else {
        SortKeys sort = SortKeys.constructSortByValueLop(input.constructLops(), SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, et);
        sort.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
        PickByCount pick = new PickByCount(sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);
        pick.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(pick);
        return pick;
    }
}

Also used : PartialAggregate(org.apache.sysml.lops.PartialAggregate) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate) SortKeys(org.apache.sysml.lops.SortKeys) Group(org.apache.sysml.lops.Group) PickByCount(org.apache.sysml.lops.PickByCount) CombineUnary(org.apache.sysml.lops.CombineUnary) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Data(org.apache.sysml.lops.Data) PartialAggregate(org.apache.sysml.lops.PartialAggregate) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) Aggregate(org.apache.sysml.lops.Aggregate) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate) CombineUnary(org.apache.sysml.lops.CombineUnary) Unary(org.apache.sysml.lops.Unary) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 7 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class BinaryOp method constructLopsIQM.

private void constructLopsIQM(ExecType et) throws HopsException, LopsException {
    if (et == ExecType.MR) {
        CombineBinary combine = CombineBinary.constructCombineLop(OperationTypes.PreSort, (Lop) getInput().get(0).constructLops(), (Lop) getInput().get(1).constructLops(), DataType.MATRIX, getValueType());
        combine.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
        SortKeys sort = SortKeys.constructSortByValueLop(combine, SortKeys.OperationTypes.WithWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // Sort dimensions are same as the first input
        sort.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
        Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));
        setLineNumbers(lit);
        PickByCount pick = new PickByCount(sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);
        pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
        setLineNumbers(pick);
        PartialAggregate pagg = new PartialAggregate(pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType());
        setLineNumbers(pagg);
        // Set the dimensions of PartialAggregate LOP based on the
        // direction in which aggregation is performed
        pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());
        Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
        setOutputDimensions(group1);
        setLineNumbers(group1);
        Aggregate agg1 = new Aggregate(group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR);
        setOutputDimensions(agg1);
        agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), DataType.SCALAR, getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        Unary iqm = new Unary(sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP);
        iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(iqm);
        setLops(iqm);
    } else {
        SortKeys sort = SortKeys.constructSortByValueLop(getInput().get(0).constructLops(), getInput().get(1).constructLops(), SortKeys.OperationTypes.WithWeights, getInput().get(0).getDataType(), getInput().get(0).getValueType(), et);
        sort.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
        PickByCount pick = new PickByCount(sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);
        setOutputDimensions(pick);
        setLineNumbers(pick);
        setLops(pick);
    }
}

Also used : PartialAggregate(org.apache.sysml.lops.PartialAggregate) CombineBinary(org.apache.sysml.lops.CombineBinary) SortKeys(org.apache.sysml.lops.SortKeys) Group(org.apache.sysml.lops.Group) PickByCount(org.apache.sysml.lops.PickByCount) Data(org.apache.sysml.lops.Data) PartialAggregate(org.apache.sysml.lops.PartialAggregate) Aggregate(org.apache.sysml.lops.Aggregate) Unary(org.apache.sysml.lops.Unary) CombineUnary(org.apache.sysml.lops.CombineUnary) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 8 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class Dag method getRecordReaderInstructions.

/**
	 * Method to get record reader instructions for a MR job.
	 * 
	 * @param node low-level operator
	 * @param execNodes list of exec nodes
	 * @param inputStrings list of input strings
	 * @param recordReaderInstructions list of record reader instructions
	 * @param nodeIndexMapping node index mapping
	 * @param start_index start index
	 * @param inputLabels list of input labels
	 * @param inputLops list of input lops
	 * @param MRJobLineNumbers MR job line numbers
	 * @return -1 if problem
	 * @throws LopsException if LopsException occurs
	 */
private static int getRecordReaderInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<String> recordReaderInstructions, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
    // if input source, return index
    if (nodeIndexMapping.containsKey(node))
        return nodeIndexMapping.get(node);
    // not input source and not in exec nodes, then return.
    if (!execNodes.contains(node))
        return -1;
    ArrayList<Integer> inputIndices = new ArrayList<Integer>();
    int max_input_index = -1;
    // get mapper instructions
    for (int i = 0; i < node.getInputs().size(); i++) {
        // recurse
        Lop childNode = node.getInputs().get(i);
        int ret_val = getRecordReaderInstructions(childNode, execNodes, inputStrings, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
        inputIndices.add(ret_val);
        if (ret_val > max_input_index) {
            max_input_index = ret_val;
        //child_for_max_input_index = childNode;
        }
    }
    // instructions
    if ((node.getExecLocation() == ExecLocation.RecordReader)) {
        int output_index = max_input_index;
        // cannot reuse index if this is true
        // need to add better indexing schemes
        output_index = start_index[0];
        start_index[0]++;
        nodeIndexMapping.put(node, output_index);
        // only Ranagepick lop can contribute to labels
        if (node.getType() == Type.PickValues) {
            PickByCount pbc = (PickByCount) node;
            if (pbc.getOperationType() == PickByCount.OperationTypes.RANGEPICK) {
                // always the second input is a scalar
                int scalarIndex = 1;
                // if data lop not a literal -- add label
                if (node.getInputs().get(scalarIndex).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(scalarIndex))).isLiteral()) {
                    inputLabels.add(node.getInputs().get(scalarIndex).getOutputParameters().getLabel());
                    inputLops.add(node.getInputs().get(scalarIndex));
                }
                // if not data lop, then this is an intermediate variable.
                if (node.getInputs().get(scalarIndex).getExecLocation() != ExecLocation.Data) {
                    inputLabels.add(node.getInputs().get(scalarIndex).getOutputParameters().getLabel());
                    inputLops.add(node.getInputs().get(scalarIndex));
                }
            }
        }
        // get recordreader instruction.
        if (node.getInputs().size() == 2) {
            recordReaderInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
            if (DMLScript.ENABLE_DEBUG_MODE) {
                MRJobLineNumbers.add(node._beginLine);
            }
        } else
            throw new LopsException("Unexpected number of inputs while generating a RecordReader Instruction");
        return output_index;
    }
    return -1;
}

Also used : PickByCount(org.apache.sysml.lops.PickByCount) LopsException(org.apache.sysml.lops.LopsException) ArrayList(java.util.ArrayList) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop)

Example 9 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class Dag method doGreedyGrouping.

/**
	 * Method to group a vector of sorted lops.
	 * 
	 * @param sb statement block
	 * @param node_v list of low-level operators
	 * @return list of instructions
	 * @throws LopsException if LopsException occurs
	 * @throws IOException if IOException occurs
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private ArrayList<Instruction> doGreedyGrouping(StatementBlock sb, ArrayList<Lop> node_v) throws LopsException, IOException, DMLRuntimeException {
    if (LOG.isTraceEnabled())
        LOG.trace("Grouping DAG ============");
    // nodes to be executed in current iteration
    ArrayList<Lop> execNodes = new ArrayList<Lop>();
    // nodes that have already been processed
    ArrayList<Lop> finishedNodes = new ArrayList<Lop>();
    // nodes that are queued for the following iteration
    ArrayList<Lop> queuedNodes = new ArrayList<Lop>();
    ArrayList<ArrayList<Lop>> jobNodes = createNodeVectors(JobType.getNumJobTypes());
    // list of instructions
    ArrayList<Instruction> inst = new ArrayList<Instruction>();
    //ArrayList<Instruction> preWriteDeleteInst = new ArrayList<Instruction>();
    ArrayList<Instruction> writeInst = new ArrayList<Instruction>();
    ArrayList<Instruction> deleteInst = new ArrayList<Instruction>();
    ArrayList<Instruction> endOfBlockInst = new ArrayList<Instruction>();
    // remove files for transient reads that are updated.
    deleteUpdatedTransientReadVariables(sb, node_v, writeInst);
    generateRemoveInstructions(sb, endOfBlockInst);
    generateInstructionsForInputVariables(node_v, inst);
    boolean done = false;
    String indent = "    ";
    while (!done) {
        if (LOG.isTraceEnabled())
            LOG.trace("Grouping nodes in DAG");
        execNodes.clear();
        queuedNodes.clear();
        clearNodeVectors(jobNodes);
        gmrMapperFootprint = 0;
        for (Lop node : node_v) {
            // finished nodes don't need to be processed
            if (finishedNodes.contains(node))
                continue;
            if (LOG.isTraceEnabled())
                LOG.trace("Processing node (" + node.getID() + ") " + node.toString() + " exec nodes size is " + execNodes.size());
            //its children nodes in execNodes 
            if (node.definesMRJob() && !compatibleWithChildrenInExecNodes(execNodes, node)) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Queueing node " + node.toString() + " (code 1)");
                queuedNodes.add(node);
                removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                continue;
            }
            // iteration
            if (hasChildNode(node, queuedNodes)) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Queueing node " + node.toString() + " (code 2)");
                queuedNodes.add(node);
                // if node has more than two inputs,
                // remove children that will be needed in a future
                // iterations
                // may also have to remove parent nodes of these children
                removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                continue;
            }
            // if inputs come from different jobs, then queue
            if (node.getInputs().size() >= 2) {
                int jobid = Integer.MIN_VALUE;
                boolean queueit = false;
                for (int idx = 0; idx < node.getInputs().size(); idx++) {
                    int input_jobid = jobType(node.getInputs().get(idx), jobNodes);
                    if (input_jobid != -1) {
                        if (jobid == Integer.MIN_VALUE)
                            jobid = input_jobid;
                        else if (jobid != input_jobid) {
                            queueit = true;
                            break;
                        }
                    }
                }
                if (queueit) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing node " + node.toString() + " (code 3)");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                    continue;
                }
            }
            // See if this lop can be eliminated
            // This check is for "aligner" lops (e.g., group)
            boolean eliminate = false;
            eliminate = canEliminateLop(node, execNodes);
            if (eliminate) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding -" + node.toString());
                execNodes.add(node);
                finishedNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, eliminate);
                continue;
            }
            // children that defines a MR Job are present in execNodes
            if (node.definesMRJob()) {
                if (hasMRJobChildNode(node, execNodes)) {
                    // this is because "group" can be pushed into the "Rand" job.
                    if (!(node.getType() == Lop.Type.Grouping && checkDataGenAsChildNode(node, execNodes))) {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing node " + node.toString() + " (code 4)");
                        queuedNodes.add(node);
                        removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                        continue;
                    }
                }
            }
            // not, queue "node"
            if (node.getInputs().size() > 1 && hasChildNode(node, execNodes, ExecLocation.RecordReader)) {
                // get the actual RecordReader lop
                Lop rr_node = getChildNode(node, execNodes, ExecLocation.RecordReader);
                // all inputs of "node" must be ancestors of rr_node
                boolean queue_it = false;
                for (Lop n : node.getInputs()) {
                    // each input should be ancestor of RecordReader lop
                    if (!n.equals(rr_node) && !isChild(rr_node, n, IDMap)) {
                        // i.e., "node" must be queued
                        queue_it = true;
                        break;
                    }
                }
                if (queue_it) {
                    // queue node
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 5)");
                    queuedNodes.add(node);
                    // TODO: does this have to be modified to handle
                    // recordreader lops?
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                    continue;
                } else {
                    // on "node"
                    ;
                }
            }
            // only write nodes are kept in execnodes
            if (node.getExecLocation() == ExecLocation.Data) {
                Data dnode = (Data) node;
                boolean dnode_queued = false;
                if (dnode.getOperationType() == OperationTypes.READ) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding Data -" + node.toString());
                    // TODO: avoid readScalar instruction, and read it on-demand just like the way Matrices are read in control program
                    if (node.getDataType() == DataType.SCALAR && //TODO: LEO check the following condition is still needed
                    node.getOutputParameters().getFile_name() != null) {
                        // this lop corresponds to reading a scalar from HDFS file
                        // add it to execNodes so that "readScalar" instruction gets generated
                        execNodes.add(node);
                    // note: no need to add it to any job vector
                    }
                } else if (dnode.getOperationType() == OperationTypes.WRITE) {
                    // Skip the transient write <code>node</code> if the input is a 
                    // transient read with the same variable name. i.e., a dummy copy. 
                    // Hence, <code>node</code> can be avoided.
                    // TODO: this case should ideally be handled in the language layer 
                    //       prior to the construction of Hops Dag 
                    Lop input = dnode.getInputs().get(0);
                    if (dnode.isTransient() && input.getExecLocation() == ExecLocation.Data && ((Data) input).isTransient() && dnode.getOutputParameters().getLabel().equals(input.getOutputParameters().getLabel())) {
                        // do nothing, <code>node</code> must not processed any further.
                        ;
                    } else if (execNodes.contains(input) && !isCompatible(node, input) && sendWriteLopToMR(node)) {
                        // input is in execNodes but it is not compatible with write lop. So, queue the write lop.
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing -" + node.toString());
                        queuedNodes.add(node);
                        dnode_queued = true;
                    } else {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Adding Data -" + node.toString());
                        execNodes.add(node);
                        if (sendWriteLopToMR(node)) {
                            addNodeByJobType(node, jobNodes, execNodes, false);
                        }
                    }
                }
                if (!dnode_queued)
                    finishedNodes.add(node);
                continue;
            }
            // map or reduce node, can always be piggybacked with parent
            if (node.getExecLocation() == ExecLocation.MapOrReduce) {
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding -" + node.toString());
                execNodes.add(node);
                finishedNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, false);
                continue;
            }
            // RecordReader node, add, if no parent needs reduce, else queue
            if (node.getExecLocation() == ExecLocation.RecordReader) {
                // execNodes .. it has to be the first one in the job!
                if (!hasChildNode(node, execNodes, ExecLocation.Map) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding -" + node.toString());
                    execNodes.add(node);
                    finishedNodes.add(node);
                    addNodeByJobType(node, jobNodes, execNodes, false);
                } else {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 6)");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                }
                continue;
            }
            // map node, add, if no parent needs reduce, else queue
            if (node.getExecLocation() == ExecLocation.Map) {
                boolean queueThisNode = false;
                int subcode = -1;
                if (node.usesDistributedCache()) {
                    // if an input to <code>node</code> comes from distributed cache
                    // then that input must get executed in one of the previous jobs.
                    int[] dcInputIndexes = node.distributedCacheInputIndex();
                    for (int dcInputIndex : dcInputIndexes) {
                        Lop dcInput = node.getInputs().get(dcInputIndex - 1);
                        if ((dcInput.getType() != Lop.Type.Data && dcInput.getExecType() == ExecType.MR) && execNodes.contains(dcInput)) {
                            queueThisNode = true;
                            subcode = 1;
                        }
                    }
                    // Limit the number of distributed cache inputs based on the available memory in mappers
                    double memsize = computeFootprintInMapper(node);
                    //gmrMapperFootprint += computeFootprintInMapper(node);
                    if (gmrMapperFootprint > 0 && !checkMemoryLimits(node, gmrMapperFootprint + memsize)) {
                        queueThisNode = true;
                        subcode = 2;
                    }
                    if (!queueThisNode)
                        gmrMapperFootprint += memsize;
                }
                if (!queueThisNode && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasMRJobChildNode(node, execNodes)) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding -" + node.toString());
                    execNodes.add(node);
                    finishedNodes.add(node);
                    addNodeByJobType(node, jobNodes, execNodes, false);
                } else {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 7 - " + "subcode " + subcode + ")");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                }
                continue;
            }
            // reduce node, make sure no parent needs reduce, else queue
            if (node.getExecLocation() == ExecLocation.MapAndReduce) {
                // not define a job
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding -" + node.toString());
                execNodes.add(node);
                finishedNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, eliminate);
                continue;
            }
            // aligned reduce, make sure a parent that is reduce exists
            if (node.getExecLocation() == ExecLocation.Reduce) {
                if (compatibleWithChildrenInExecNodes(execNodes, node) && (hasChildNode(node, execNodes, ExecLocation.MapAndReduce) || hasChildNode(node, execNodes, ExecLocation.Map))) {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Adding -" + node.toString());
                    execNodes.add(node);
                    finishedNodes.add(node);
                    addNodeByJobType(node, jobNodes, execNodes, false);
                } else {
                    if (LOG.isTraceEnabled())
                        LOG.trace(indent + "Queueing -" + node.toString() + " (code 8)");
                    queuedNodes.add(node);
                    removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                }
                continue;
            }
            // that will be executed in a MR job.
            if (node.getExecLocation() == ExecLocation.ControlProgram) {
                for (Lop lop : node.getInputs()) {
                    if (execNodes.contains(lop) && !(lop.getExecLocation() == ExecLocation.Data) && !(lop.getExecLocation() == ExecLocation.ControlProgram)) {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing -" + node.toString() + " (code 9)");
                        queuedNodes.add(node);
                        removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                        break;
                    }
                }
                if (queuedNodes.contains(node))
                    continue;
                if (LOG.isTraceEnabled())
                    LOG.trace(indent + "Adding - scalar" + node.toString());
                execNodes.add(node);
                addNodeByJobType(node, jobNodes, execNodes, false);
                finishedNodes.add(node);
                continue;
            }
        }
        // no work to do
        if (execNodes.isEmpty()) {
            if (!queuedNodes.isEmpty()) {
                //System.err.println("Queued nodes should be 0");
                throw new LopsException("Queued nodes should not be 0 at this point \n");
            }
            if (LOG.isTraceEnabled())
                LOG.trace("All done! queuedNodes = " + queuedNodes.size());
            done = true;
        } else {
            if (LOG.isTraceEnabled())
                LOG.trace("Generating jobs for group -- Node count=" + execNodes.size());
            // first process scalar instructions
            generateControlProgramJobs(execNodes, inst, writeInst, deleteInst);
            // copy unassigned lops in execnodes to gmrnodes
            for (int i = 0; i < execNodes.size(); i++) {
                Lop node = execNodes.get(i);
                if (jobType(node, jobNodes) == -1) {
                    if (isCompatible(node, JobType.GMR)) {
                        if (node.hasNonBlockedInputs()) {
                            jobNodes.get(JobType.GMRCELL.getId()).add(node);
                            addChildren(node, jobNodes.get(JobType.GMRCELL.getId()), execNodes);
                        } else {
                            jobNodes.get(JobType.GMR.getId()).add(node);
                            addChildren(node, jobNodes.get(JobType.GMR.getId()), execNodes);
                        }
                    } else {
                        if (LOG.isTraceEnabled())
                            LOG.trace(indent + "Queueing -" + node.toString() + " (code 10)");
                        execNodes.remove(i);
                        finishedNodes.remove(node);
                        queuedNodes.add(node);
                        removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
                    }
                }
            }
            // next generate MR instructions
            if (!execNodes.isEmpty())
                generateMRJobs(execNodes, inst, writeInst, deleteInst, jobNodes);
            handleSingleOutputJobs(execNodes, jobNodes, finishedNodes);
        }
    }
    // add write and delete inst at the very end.
    //inst.addAll(preWriteDeleteInst);
    inst.addAll(writeInst);
    inst.addAll(deleteInst);
    inst.addAll(endOfBlockInst);
    return inst;
}

Also used : ArrayList(java.util.ArrayList) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) LopsException(org.apache.sysml.lops.LopsException)

Example 10 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class Dag method getMapperInstructions.

/**
	 * Method to get mapper instructions for a MR job.
	 * 
	 * @param node low-level operator
	 * @param execNodes list of exec nodes
	 * @param inputStrings list of input strings
	 * @param instructionsInMapper list of instructions in mapper
	 * @param nodeIndexMapping ?
	 * @param start_index starting index
	 * @param inputLabels input labels
	 * @param MRJoblineNumbers MR job line numbers
	 * @return -1 if problem
	 * @throws LopsException if LopsException occurs
	 */
private int getMapperInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<String> instructionsInMapper, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
    // if input source, return index
    if (nodeIndexMapping.containsKey(node))
        return nodeIndexMapping.get(node);
    // not input source and not in exec nodes, then return.
    if (!execNodes.contains(node))
        return -1;
    ArrayList<Integer> inputIndices = new ArrayList<Integer>();
    int max_input_index = -1;
    // get mapper instructions
    for (Lop childNode : node.getInputs()) {
        int ret_val = getMapperInstructions(childNode, execNodes, inputStrings, instructionsInMapper, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
        inputIndices.add(ret_val);
        if (ret_val > max_input_index) {
            max_input_index = ret_val;
        }
    }
    // to mapper instructions.
    if ((node.getExecLocation() == ExecLocation.Map || node.getExecLocation() == ExecLocation.MapOrReduce) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasChildNode(node, execNodes, ExecLocation.Reduce)) {
        int output_index = max_input_index;
        // cannot reuse index if this is true
        // need to add better indexing schemes
        // if (child_for_max_input_index.getOutputs().size() > 1) {
        output_index = start_index[0];
        start_index[0]++;
        // }
        nodeIndexMapping.put(node, output_index);
        if (node instanceof Unary && node.getInputs().size() > 1) {
            // Following code must be executed only for those Unary
            // operators that have more than one input
            // It should not be executed for "true" unary operators like
            // cos(A).
            int index = 0;
            for (int i1 = 0; i1 < node.getInputs().size(); i1++) {
                if (node.getInputs().get(i1).getDataType() == DataType.SCALAR) {
                    index = i1;
                    break;
                }
            }
            // if data lop not a literal -- add label
            if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
                inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
                inputLops.add(node.getInputs().get(index));
            }
            // if not data lop, then this is an intermediate variable.
            if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
                inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
                inputLops.add(node.getInputs().get(index));
            }
        }
        // get mapper instruction.
        if (node.getInputs().size() == 1)
            instructionsInMapper.add(node.getInstructions(inputIndices.get(0), output_index));
        else if (node.getInputs().size() == 2) {
            instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
        } else if (node.getInputs().size() == 3)
            instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
        else if (node.getInputs().size() == 4) {
            // Example: Reshape
            instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
        } else if (node.getInputs().size() == 5) {
            // Example: RangeBasedReIndex A[row_l:row_u, col_l:col_u]
            instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), output_index));
        } else if (node.getInputs().size() == 7) {
            // Example: RangeBasedReIndex A[row_l:row_u, col_l:col_u] = B
            instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), inputIndices.get(5), inputIndices.get(6), output_index));
        } else
            throw new LopsException("Node with " + node.getInputs().size() + " inputs is not supported in dag.java.");
        if (DMLScript.ENABLE_DEBUG_MODE) {
            MRJobLineNumbers.add(node._beginLine);
        }
        return output_index;
    }
    return -1;
}

Also used : LopsException(org.apache.sysml.lops.LopsException) ArrayList(java.util.ArrayList) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) Unary(org.apache.sysml.lops.Unary)

Aggregations

Data (org.apache.sysml.lops.Data)14 Lop (org.apache.sysml.lops.Lop)12 LopsException (org.apache.sysml.lops.LopsException)10 ArrayList (java.util.ArrayList)6 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)6 VariableCPInstruction (org.apache.sysml.runtime.instructions.cp.VariableCPInstruction)6 Instruction (org.apache.sysml.runtime.instructions.Instruction)5 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)5 HashMap (java.util.HashMap)4 Unary (org.apache.sysml.lops.Unary)4 Aggregate (org.apache.sysml.lops.Aggregate)3 Group (org.apache.sysml.lops.Group)3 ExecType (org.apache.sysml.lops.LopProperties.ExecType)3 PickByCount (org.apache.sysml.lops.PickByCount)3 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)2 CombineBinary (org.apache.sysml.lops.CombineBinary)2 CombineUnary (org.apache.sysml.lops.CombineUnary)2 ParameterizedBuiltin (org.apache.sysml.lops.ParameterizedBuiltin)2 PartialAggregate (org.apache.sysml.lops.PartialAggregate)2 SortKeys (org.apache.sysml.lops.SortKeys)2