Search in sources :

Example 1 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.

private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) throws HopsException, LopsException {
    //reset reblock requirement (see MR aggregate / construct lops)
    setRequiresReblock(false);
    //determine output dimensions
    long outputDim1 = -1, outputDim2 = -1;
    Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
    if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
        long ngroups = ((Data) numGroups).getLongValue();
        Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
        long inDim1 = input.getOutputParameters().getNumRows();
        long inDim2 = input.getOutputParameters().getNumCols();
        boolean rowwise = (inDim1 == 1 && inDim2 > 1);
        if (rowwise) {
            //vector
            outputDim1 = ngroups;
            outputDim2 = 1;
        } else {
            //vector or matrix
            outputDim1 = inDim2;
            outputDim2 = ngroups;
        }
    }
    //construct lops
    if (et == ExecType.MR) {
        Lop grp_agg = null;
        // construct necessary lops: combineBinary/combineTertiary and groupedAgg
        boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
        if (isWeighted) {
            Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
            // add the combine lop to parameter list, with a new name "combinedinput"
            inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
            inputlops.remove(Statement.GAGG_TARGET);
            inputlops.remove(Statement.GAGG_GROUPS);
            inputlops.remove(Statement.GAGG_WEIGHTS);
            grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
            setRequiresReblock(true);
        } else {
            Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
            Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
            Lop append = null;
            //physical operator selection
            double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
            if (//mapgroupedagg
            groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp && ((LiteralOp) getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                //pre partitioning
                boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
                if (needPart) {
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : //operator selection
                    ExecType.MR;
                    Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
                    setLineNumbers(dcinput);
                    inputlops.put(Statement.GAGG_GROUPS, dcinput);
                }
                Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
                grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp_agg_m);
                //post aggregation 
                Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
                grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp);
                Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
                agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
                agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                grp_agg = agg1;
            //note: no reblock required
            } else //general case: groupedagg
            {
                if (// multi-column-block result matrix
                target.getDim2() >= target.getColsInBlock() || // unkown
                target.getDim2() <= 0) {
                    long m1_dim1 = target.getDim1();
                    long m1_dim2 = target.getDim2();
                    long m2_dim1 = groups.getDim1();
                    long m2_dim2 = groups.getDim2();
                    long m3_dim1 = m1_dim1;
                    long m3_dim2 = ((m1_dim2 > 0 && m2_dim2 > 0) ? (m1_dim2 + m2_dim2) : -1);
                    long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
                    long brlen = target.getRowsInBlock();
                    long bclen = target.getColsInBlock();
                    Lop offset = createOffsetLop(target, true);
                    Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
                    setOutputDimensions(rep);
                    setLineNumbers(rep);
                    Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
                    group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
                    setLineNumbers(group1);
                    Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
                    group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
                    setLineNumbers(group2);
                    append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
                    append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
                    setLineNumbers(append);
                } else //single-column-block vector or matrix
                {
                    append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
                }
                // add the combine lop to parameter list, with a new name "combinedinput"
                inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
                inputlops.remove(Statement.GAGG_TARGET);
                inputlops.remove(Statement.GAGG_GROUPS);
                grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    } else //CP/Spark 
    {
        Lop grp_agg = null;
        if (et == ExecType.CP) {
            int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
            grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
        } else if (et == ExecType.SPARK) {
            //physical operator selection
            Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
            boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
            if (//mapgroupedagg
            broadcastGroups && getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp && ((LiteralOp) getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
                grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
            //no reblock required (directly output binary block)
            } else //groupedagg (w/ or w/o broadcast)
            {
                grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) AppendR(org.apache.sysml.lops.AppendR) ExecType(org.apache.sysml.lops.LopProperties.ExecType) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) DataPartition(org.apache.sysml.lops.DataPartition) GroupedAggregateM(org.apache.sysml.lops.GroupedAggregateM)

Example 2 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class DataOp method constructLops.

@Override
public Lop constructLops() throws HopsException, LopsException {
    //return already created lops
    if (getLops() != null)
        return getLops();
    ExecType et = optFindExecType();
    Lop l = null;
    // construct lops for all input parameters
    HashMap<String, Lop> inputLops = new HashMap<String, Lop>();
    for (Entry<String, Integer> cur : _paramIndexMap.entrySet()) {
        inputLops.put(cur.getKey(), getInput().get(cur.getValue()).constructLops());
    }
    // Create the lop
    switch(_dataop) {
        case TRANSIENTREAD:
            l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null, getDataType(), getValueType(), true, getInputFormatType());
            setOutputDimensions(l);
            break;
        case PERSISTENTREAD:
            l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null, getDataType(), getValueType(), false, getInputFormatType());
            l.getOutputParameters().setDimensions(getDim1(), getDim2(), _inRowsInBlock, _inColsInBlock, getNnz(), getUpdateType());
            break;
        case PERSISTENTWRITE:
            l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, getDataType(), getValueType(), false, getInputFormatType());
            ((Data) l).setExecType(et);
            setOutputDimensions(l);
            break;
        case TRANSIENTWRITE:
            l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, getDataType(), getValueType(), true, getInputFormatType());
            setOutputDimensions(l);
            break;
        case FUNCTIONOUTPUT:
            l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, getDataType(), getValueType(), true, getInputFormatType());
            ((Data) l).setExecType(et);
            setOutputDimensions(l);
            break;
        default:
            throw new LopsException("Invalid operation type for Data LOP: " + _dataop);
    }
    setLineNumbers(l);
    setLops(l);
    //add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();
    return getLops();
}
Also used : LopsException(org.apache.sysml.lops.LopsException) HashMap(java.util.HashMap) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop)

Example 3 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class Dag method getInputPathsAndParameters.

// Method to populate inputs and also populates node index mapping.
private static void getInputPathsAndParameters(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<InputInfo> inputInfos, ArrayList<Long> numRows, ArrayList<Long> numCols, ArrayList<Long> numRowsPerBlock, ArrayList<Long> numColsPerBlock, HashMap<Lop, Integer> nodeIndexMapping, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
    // treat rand as an input.
    if (node.getType() == Type.DataGen && execNodes.contains(node) && !nodeIndexMapping.containsKey(node)) {
        numRows.add(node.getOutputParameters().getNumRows());
        numCols.add(node.getOutputParameters().getNumCols());
        numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
        numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
        inputStrings.add(node.getInstructions(inputStrings.size(), inputStrings.size()));
        if (DMLScript.ENABLE_DEBUG_MODE) {
            MRJobLineNumbers.add(node._beginLine);
        }
        inputInfos.add(InputInfo.TextCellInputInfo);
        nodeIndexMapping.put(node, inputStrings.size() - 1);
        return;
    }
    // get input file names
    if (!execNodes.contains(node) && !nodeIndexMapping.containsKey(node) && !(node.getExecLocation() == ExecLocation.Data) && (!(node.getExecLocation() == ExecLocation.ControlProgram && node.getDataType() == DataType.SCALAR)) || (!execNodes.contains(node) && node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() != DataType.SCALAR && !nodeIndexMapping.containsKey(node))) {
        if (node.getOutputParameters().getFile_name() != null) {
            inputStrings.add(node.getOutputParameters().getFile_name());
        } else {
            // use label name
            inputStrings.add(Lop.VARIABLE_NAME_PLACEHOLDER + node.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER);
        }
        inputLabels.add(node.getOutputParameters().getLabel());
        inputLops.add(node);
        numRows.add(node.getOutputParameters().getNumRows());
        numCols.add(node.getOutputParameters().getNumCols());
        numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
        numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
        InputInfo nodeInputInfo = null;
        // Check if file format type is binary or text and update infos
        if (node.getOutputParameters().isBlocked()) {
            if (node.getOutputParameters().getFormat() == Format.BINARY)
                nodeInputInfo = InputInfo.BinaryBlockInputInfo;
            else
                throw new LopsException("Invalid format (" + node.getOutputParameters().getFormat() + ") encountered for a node/lop (ID=" + node.getID() + ") with blocked output.");
        } else {
            if (node.getOutputParameters().getFormat() == Format.TEXT)
                nodeInputInfo = InputInfo.TextCellInputInfo;
            else
                nodeInputInfo = InputInfo.BinaryCellInputInfo;
        }
        // the information on key/value classes
        if (node.getType() == Type.SortKeys) {
            // SortKeys is the input to some other lop (say, L)
            // InputInfo of L is the ouputInfo of SortKeys, which is
            // (compactformat, doubleWriteable, IntWritable)
            nodeInputInfo = new InputInfo(PickFromCompactInputFormat.class, DoubleWritable.class, IntWritable.class);
        } else if (node.getType() == Type.CombineBinary) {
            // CombineBinary is the input to some other lop (say, L)
            // InputInfo of L is the ouputInfo of CombineBinary
            // And, the outputInfo of CombineBinary depends on the operation!
            CombineBinary combine = (CombineBinary) node;
            if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
                nodeInputInfo = new InputInfo(SequenceFileInputFormat.class, DoubleWritable.class, IntWritable.class);
            } else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
                nodeInputInfo = InputInfo.WeightedPairInputInfo;
            }
        } else if (node.getType() == Type.CombineTernary) {
            nodeInputInfo = InputInfo.WeightedPairInputInfo;
        }
        inputInfos.add(nodeInputInfo);
        nodeIndexMapping.put(node, inputStrings.size() - 1);
        return;
    }
    // if exec nodes does not contain node at this point, return.
    if (!execNodes.contains(node))
        return;
    // process children recursively
    for (Lop lop : node.getInputs()) {
        getInputPathsAndParameters(lop, execNodes, inputStrings, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
    }
}
Also used : CombineBinary(org.apache.sysml.lops.CombineBinary) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) LopsException(org.apache.sysml.lops.LopsException) PickFromCompactInputFormat(org.apache.sysml.runtime.matrix.sort.PickFromCompactInputFormat) Data(org.apache.sysml.lops.Data) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Lop(org.apache.sysml.lops.Lop) IntWritable(org.apache.hadoop.io.IntWritable)

Example 4 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class Dag method getAggAndOtherInstructions.

/**
	 * Method to populate aggregate and other instructions in reducer.
	 * 
	 * @param node low-level operator
	 * @param execNodes list of exec nodes
	 * @param shuffleInstructions list of shuffle instructions
	 * @param aggInstructionsReducer ?
	 * @param otherInstructionsReducer ?
	 * @param nodeIndexMapping node index mapping
	 * @param start_index start index
	 * @param inputLabels list of input labels
	 * @param inputLops list of input lops
	 * @param MRJobLineNumbers MR job line numbers
	 * @return -1 if problem
	 * @throws LopsException if LopsException occurs
	 */
private int getAggAndOtherInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> shuffleInstructions, ArrayList<String> aggInstructionsReducer, ArrayList<String> otherInstructionsReducer, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
    int ret_val = -1;
    if (nodeIndexMapping.containsKey(node))
        return nodeIndexMapping.get(node);
    if (!execNodes.contains(node))
        return ret_val;
    ArrayList<Integer> inputIndices = new ArrayList<Integer>();
    // first element.
    if (node.getType() == Lop.Type.Data && ((Data) node).getOperationType() == Data.OperationTypes.WRITE) {
        ret_val = getAggAndOtherInstructions(node.getInputs().get(0), execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
        inputIndices.add(ret_val);
    } else {
        for (Lop cnode : node.getInputs()) {
            ret_val = getAggAndOtherInstructions(cnode, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
            inputIndices.add(ret_val);
        }
    }
    if (node.getExecLocation() == ExecLocation.Data) {
        if (((Data) node).getFileFormatType() == FileFormatTypes.CSV && !(node.getInputs().get(0) instanceof ParameterizedBuiltin && ((ParameterizedBuiltin) node.getInputs().get(0)).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM)) {
            // Generate write instruction, which goes into CSV_WRITE Job
            int output_index = start_index[0];
            shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
            if (DMLScript.ENABLE_DEBUG_MODE) {
                MRJobLineNumbers.add(node._beginLine);
            }
            nodeIndexMapping.put(node, output_index);
            start_index[0]++;
            return output_index;
        } else {
            return ret_val;
        }
    }
    if (node.getExecLocation() == ExecLocation.MapAndReduce) {
        /* Generate Shuffle Instruction for "node", and return the index associated with produced output */
        boolean instGenerated = true;
        int output_index = start_index[0];
        switch(node.getType()) {
            /* Lop types that take a single input */
            case ReBlock:
            case CSVReBlock:
            case SortKeys:
            case CentralMoment:
            case CoVariance:
            case GroupedAgg:
            case DataPartition:
                shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
                break;
            case ParameterizedBuiltin:
                if (((ParameterizedBuiltin) node).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM) {
                    shuffleInstructions.add(node.getInstructions(output_index));
                    if (DMLScript.ENABLE_DEBUG_MODE) {
                        MRJobLineNumbers.add(node._beginLine);
                    }
                }
                break;
            /* Lop types that take two inputs */
            case MMCJ:
            case MMRJ:
            case CombineBinary:
                shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
                break;
            /* Lop types that take three inputs */
            case CombineTernary:
                shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
                break;
            default:
                instGenerated = false;
                break;
        }
        if (instGenerated) {
            nodeIndexMapping.put(node, output_index);
            start_index[0]++;
            return output_index;
        } else {
            return inputIndices.get(0);
        }
    }
    /* Get instructions for aligned reduce and other lops below the reduce. */
    if (node.getExecLocation() == ExecLocation.Reduce || node.getExecLocation() == ExecLocation.MapOrReduce || hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
        if (inputIndices.size() == 1) {
            int output_index = start_index[0];
            start_index[0]++;
            if (node.getType() == Type.Aggregate) {
                aggInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
            } else {
                otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
            }
            if (DMLScript.ENABLE_DEBUG_MODE) {
                MRJobLineNumbers.add(node._beginLine);
            }
            nodeIndexMapping.put(node, output_index);
            return output_index;
        } else if (inputIndices.size() == 2) {
            int output_index = start_index[0];
            start_index[0]++;
            otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
            if (DMLScript.ENABLE_DEBUG_MODE) {
                MRJobLineNumbers.add(node._beginLine);
            }
            nodeIndexMapping.put(node, output_index);
            if (node instanceof Unary && node.getInputs().size() > 1) {
                int index = 0;
                for (int i = 0; i < node.getInputs().size(); i++) {
                    if (node.getInputs().get(i).getDataType() == DataType.SCALAR) {
                        index = i;
                        break;
                    }
                }
                if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
                    inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
                    inputLops.add(node.getInputs().get(index));
                }
                if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
                    inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
                    inputLops.add(node.getInputs().get(index));
                }
            }
            return output_index;
        } else if (inputIndices.size() == 3 || node.getType() == Type.Ternary) {
            int output_index = start_index[0];
            start_index[0]++;
            if (node.getType() == Type.Ternary) {
                // in case of CTABLE_TRANSFORM_SCALAR_WEIGHT: inputIndices.get(2) would be -1
                otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
                nodeIndexMapping.put(node, output_index);
            } else if (node.getType() == Type.ParameterizedBuiltin) {
                otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
                nodeIndexMapping.put(node, output_index);
            } else {
                otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    MRJobLineNumbers.add(node._beginLine);
                }
                nodeIndexMapping.put(node, output_index);
                return output_index;
            }
            return output_index;
        } else if (inputIndices.size() == 4) {
            int output_index = start_index[0];
            start_index[0]++;
            otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
            if (DMLScript.ENABLE_DEBUG_MODE) {
                MRJobLineNumbers.add(node._beginLine);
            }
            nodeIndexMapping.put(node, output_index);
            return output_index;
        } else
            throw new LopsException("Invalid number of inputs to a lop: " + inputIndices.size());
    }
    return -1;
}
Also used : LopsException(org.apache.sysml.lops.LopsException) ParameterizedBuiltin(org.apache.sysml.lops.ParameterizedBuiltin) ArrayList(java.util.ArrayList) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) Unary(org.apache.sysml.lops.Unary)

Example 5 with Data

use of org.apache.sysml.lops.Data in project incubator-systemml by apache.

the class Dag method generateControlProgramJobs.

/**
	 * Method to generate instructions that are executed in Control Program. At
	 * this point, this DAG has no dependencies on the MR dag. ie. none of the
	 * inputs are outputs of MR jobs
	 * 
	 * @param execNodes list of low-level operators
	 * @param inst list of instructions
	 * @param writeInst list of write instructions
	 * @param deleteInst list of delete instructions
	 * @throws LopsException if LopsException occurs
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private void generateControlProgramJobs(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeInst, ArrayList<Instruction> deleteInst) throws LopsException, DMLRuntimeException {
    // nodes to be deleted from execnodes
    ArrayList<Lop> markedNodes = new ArrayList<Lop>();
    // variable names to be deleted
    ArrayList<String> var_deletions = new ArrayList<String>();
    HashMap<String, Lop> var_deletionsLineNum = new HashMap<String, Lop>();
    boolean doRmVar = false;
    for (int i = 0; i < execNodes.size(); i++) {
        Lop node = execNodes.get(i);
        doRmVar = false;
        // TODO: statiko -- check if this condition ever evaluated to TRUE
        if (node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() == DataType.SCALAR && node.getOutputParameters().getFile_name() == null) {
            markedNodes.add(node);
            continue;
        }
        // output scalar instructions and mark nodes for deletion
        if (node.getExecLocation() == ExecLocation.ControlProgram) {
            if (node.getDataType() == DataType.SCALAR) {
                // Output from lops with SCALAR data type must
                // go into Temporary Variables (Var0, Var1, etc.)
                NodeOutput out = setupNodeOutputs(node, ExecType.CP, false, false);
                // dummy
                inst.addAll(out.getPreInstructions());
                deleteInst.addAll(out.getLastInstructions());
            } else {
                // Output from lops with non-SCALAR data type must
                // go into Temporary Files (temp0, temp1, etc.)
                NodeOutput out = setupNodeOutputs(node, ExecType.CP, false, false);
                inst.addAll(out.getPreInstructions());
                boolean hasTransientWriteParent = false;
                for (Lop parent : node.getOutputs()) {
                    if (parent.getExecLocation() == ExecLocation.Data && ((Data) parent).getOperationType() == Data.OperationTypes.WRITE && ((Data) parent).isTransient()) {
                        hasTransientWriteParent = true;
                        break;
                    }
                }
                if (!hasTransientWriteParent) {
                    deleteInst.addAll(out.getLastInstructions());
                } else {
                    var_deletions.add(node.getOutputParameters().getLabel());
                    var_deletionsLineNum.put(node.getOutputParameters().getLabel(), node);
                }
            }
            String inst_string = "";
            // are handled separately, by simply passing ONLY the output variable to getInstructions()
            if (node.getType() == Lop.Type.ParameterizedBuiltin || node.getType() == Lop.Type.GroupedAgg || node.getType() == Lop.Type.DataGen) {
                inst_string = node.getInstructions(node.getOutputParameters().getLabel());
            } else // separately as well by passing arrays of inputs and outputs
            if (node.getType() == Lop.Type.FunctionCallCP) {
                String[] inputs = new String[node.getInputs().size()];
                String[] outputs = new String[node.getOutputs().size()];
                int count = 0;
                for (Lop in : node.getInputs()) inputs[count++] = in.getOutputParameters().getLabel();
                count = 0;
                for (Lop out : node.getOutputs()) {
                    outputs[count++] = out.getOutputParameters().getLabel();
                }
                inst_string = node.getInstructions(inputs, outputs);
            } else if (node.getType() == Lop.Type.MULTIPLE_CP) {
                // ie, MultipleCP class
                inst_string = node.getInstructions(node.getOutputParameters().getLabel());
            } else {
                if (node.getInputs().isEmpty()) {
                    // currently, such a case exists only for Rand lop
                    inst_string = node.getInstructions(node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 1) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 2) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 3 || node.getType() == Type.Ternary) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 4) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 5) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getInputs().get(4).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 6) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getInputs().get(4).getOutputParameters().getLabel(), node.getInputs().get(5).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else if (node.getInputs().size() == 7) {
                    inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getInputs().get(4).getOutputParameters().getLabel(), node.getInputs().get(5).getOutputParameters().getLabel(), node.getInputs().get(6).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
                } else {
                    String[] inputs = new String[node.getInputs().size()];
                    for (int j = 0; j < node.getInputs().size(); j++) inputs[j] = node.getInputs().get(j).getOutputParameters().getLabel();
                    inst_string = node.getInstructions(inputs, node.getOutputParameters().getLabel());
                }
            }
            try {
                if (LOG.isTraceEnabled())
                    LOG.trace("Generating instruction - " + inst_string);
                Instruction currInstr = InstructionParser.parseSingleInstruction(inst_string);
                if (currInstr == null) {
                    throw new LopsException("Error parsing the instruction:" + inst_string);
                }
                if (node._beginLine != 0)
                    currInstr.setLocation(node);
                else if (!node.getOutputs().isEmpty())
                    currInstr.setLocation(node.getOutputs().get(0));
                else if (!node.getInputs().isEmpty())
                    currInstr.setLocation(node.getInputs().get(0));
                inst.add(currInstr);
            } catch (Exception e) {
                throw new LopsException(node.printErrorLocation() + "Problem generating simple inst - " + inst_string, e);
            }
            markedNodes.add(node);
            doRmVar = true;
        //continue;
        } else if (node.getExecLocation() == ExecLocation.Data) {
            Data dnode = (Data) node;
            Data.OperationTypes op = dnode.getOperationType();
            if (op == Data.OperationTypes.WRITE) {
                NodeOutput out = null;
                if (sendWriteLopToMR(node)) {
                    // In this case, Data WRITE lop goes into MR, and 
                    // we don't have to do anything here
                    doRmVar = false;
                } else {
                    out = setupNodeOutputs(node, ExecType.CP, false, false);
                    if (dnode.getDataType() == DataType.SCALAR) {
                        // processing is same for both transient and persistent scalar writes 
                        writeInst.addAll(out.getLastInstructions());
                        //inst.addAll(out.getLastInstructions());
                        doRmVar = false;
                    } else {
                        // setupNodeOutputs() handles both transient and persistent matrix writes 
                        if (dnode.isTransient()) {
                            //inst.addAll(out.getPreInstructions()); // dummy ?
                            deleteInst.addAll(out.getLastInstructions());
                            doRmVar = false;
                        } else {
                            // In case of persistent write lop, write instruction will be generated 
                            // and that instruction must be added to <code>inst</code> so that it gets
                            // executed immediately. If it is added to <code>deleteInst</code> then it
                            // gets executed at the end of program block's execution
                            inst.addAll(out.getLastInstructions());
                            doRmVar = true;
                        }
                    }
                    markedNodes.add(node);
                //continue;
                }
            } else {
                // generate a temp label to hold the value that is read from HDFS
                if (node.getDataType() == DataType.SCALAR) {
                    node.getOutputParameters().setLabel(Lop.SCALAR_VAR_NAME_PREFIX + var_index.getNextID());
                    String io_inst = node.getInstructions(node.getOutputParameters().getLabel(), node.getOutputParameters().getFile_name());
                    CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
                    currInstr.setLocation(node);
                    inst.add(currInstr);
                    Instruction tempInstr = VariableCPInstruction.prepareRemoveInstruction(node.getOutputParameters().getLabel());
                    tempInstr.setLocation(node);
                    deleteInst.add(tempInstr);
                } else {
                    throw new LopsException("Matrix READs are not handled in CP yet!");
                }
                markedNodes.add(node);
                doRmVar = true;
            //continue;
            }
        }
        // see if rmvar instructions can be generated for node's inputs
        if (doRmVar)
            processConsumersForInputs(node, inst, deleteInst);
        doRmVar = false;
    }
    for (String var : var_deletions) {
        Instruction rmInst = VariableCPInstruction.prepareRemoveInstruction(var);
        if (LOG.isTraceEnabled())
            LOG.trace("  Adding var_deletions: " + rmInst.toString());
        rmInst.setLocation(var_deletionsLineNum.get(var));
        deleteInst.add(rmInst);
    }
    // delete all marked nodes
    for (Lop node : markedNodes) {
        execNodes.remove(node);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) HopsException(org.apache.sysml.hops.HopsException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LopsException(org.apache.sysml.lops.LopsException) IOException(java.io.IOException) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) LopsException(org.apache.sysml.lops.LopsException) OperationTypes(org.apache.sysml.lops.Data.OperationTypes)

Aggregations

Data (org.apache.sysml.lops.Data)14 Lop (org.apache.sysml.lops.Lop)12 LopsException (org.apache.sysml.lops.LopsException)10 ArrayList (java.util.ArrayList)6 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)6 VariableCPInstruction (org.apache.sysml.runtime.instructions.cp.VariableCPInstruction)6 Instruction (org.apache.sysml.runtime.instructions.Instruction)5 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)5 HashMap (java.util.HashMap)4 Unary (org.apache.sysml.lops.Unary)4 Aggregate (org.apache.sysml.lops.Aggregate)3 Group (org.apache.sysml.lops.Group)3 ExecType (org.apache.sysml.lops.LopProperties.ExecType)3 PickByCount (org.apache.sysml.lops.PickByCount)3 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)2 CombineBinary (org.apache.sysml.lops.CombineBinary)2 CombineUnary (org.apache.sysml.lops.CombineUnary)2 ParameterizedBuiltin (org.apache.sysml.lops.ParameterizedBuiltin)2 PartialAggregate (org.apache.sysml.lops.PartialAggregate)2 SortKeys (org.apache.sysml.lops.SortKeys)2