Search in sources :

Example 21 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class UnaryOp method constructLopsMRCumulativeUnary.

/**
 * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent offsetting.
 * Note that we currently support one robust physical operator but many alternative
 * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate
 * fit into the map task memory budget) or by creating custom job types.
 *
 * @return low-level operator
 */
private Lop constructLopsMRCumulativeUnary() {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.MR;
    OperationTypes aggtype = getCumulativeAggType();
    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<>();
    int level = 0;
    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) {
        DATA.add(TEMP);
        // preaggregation per block
        long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
        Lop preagg = new CumulativePartialAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
        preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(preagg);
        Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(group);
        Aggregate agg = new Aggregate(group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
        agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        // aggregation uses kahanSum but the inputs do not have correction values
        agg.setupCorrectionLocation(CorrectionLocationType.NONE);
        setLineNumbers(agg);
        TEMP = agg;
        level++;
        // in case of unknowns, generate one level
        force = false;
    }
    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
        int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
        Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
        unary1.getOutputParameters().setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
        setLineNumbers(unary1);
        TEMP = unary1;
    }
    // split, group and mr cumsum
    while (level-- > 0) {
        double init = getCumulativeInitValue();
        CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init);
        split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(split);
        Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(group1);
        Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(group2);
        CumulativeOffsetBinary binary = new CumulativeOffsetBinary(group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
        binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(binary);
        TEMP = binary;
    }
    return TEMP;
}
Also used : Group(org.apache.sysml.lops.Group) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ArrayList(java.util.ArrayList) Lop(org.apache.sysml.lops.Lop) CombineUnary(org.apache.sysml.lops.CombineUnary) Unary(org.apache.sysml.lops.Unary) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate) OperationTypes(org.apache.sysml.lops.Aggregate.OperationTypes) CumulativeOffsetBinary(org.apache.sysml.lops.CumulativeOffsetBinary) PartialAggregate(org.apache.sysml.lops.PartialAggregate) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) Aggregate(org.apache.sysml.lops.Aggregate) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate)

Example 22 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class AggBinaryOp method constructSparkLopsZIPMM.

private void constructSparkLopsZIPMM() {
    // zipmm applies to t(X)%*%y if ncol(X)<=blocksize and it prevents
    // unnecessary reshuffling by keeping the original indexes (and partitioning)
    // joining the datasets, and internally doing the necessary transpose operations
    // x out of t(X)
    Hop left = getInput().get(0).getInput().get(0);
    // y
    Hop right = getInput().get(1);
    // determine left-transpose rewrite beneficial
    boolean tRewrite = (left.getDim1() * left.getDim2() >= right.getDim1() * right.getDim2());
    Lop zipmm = new MMZip(left.constructLops(), right.constructLops(), getDataType(), getValueType(), tRewrite, ExecType.SPARK);
    setOutputDimensions(zipmm);
    setLineNumbers(zipmm);
    setLops(zipmm);
}
Also used : MMZip(org.apache.sysml.lops.MMZip) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop)

Example 23 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class AggBinaryOp method constructMRLopsPMM.

private void constructMRLopsPMM() {
    // PMM has two potential modes (a) w/ full permutation matrix input, and
    // (b) w/ already condensed input vector of target row positions.
    Hop pmInput = getInput().get(0);
    Hop rightInput = getInput().get(1);
    Lop lpmInput = pmInput.constructLops();
    Hop nrow = null;
    double mestPM = OptimizerUtils.estimateSize(pmInput.getDim1(), 1);
    ExecType etVect = (mestPM > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP;
    // a) full permutation matrix input (potentially without empty block materialized)
    if (// not a vector
    pmInput.getDim2() != 1) {
        // compute condensed permutation matrix vector input
        // v = rowMaxIndex(t(pm)) * rowMax(t(pm))
        ReorgOp transpose = HopRewriteUtils.createTranspose(pmInput);
        transpose.setForcedExecType(ExecType.MR);
        AggUnaryOp agg1 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAXINDEX, Direction.Row);
        agg1.setForcedExecType(ExecType.MR);
        AggUnaryOp agg2 = HopRewriteUtils.createAggUnaryOp(transpose, AggOp.MAX, Direction.Row);
        agg2.setForcedExecType(ExecType.MR);
        BinaryOp mult = HopRewriteUtils.createBinary(agg1, agg2, OpOp2.MULT);
        mult.setForcedExecType(ExecType.MR);
        // compute NROW target via nrow(m)
        nrow = HopRewriteUtils.createValueHop(pmInput, true);
        nrow.setOutputBlocksizes(0, 0);
        nrow.setForcedExecType(ExecType.CP);
        HopRewriteUtils.copyLineNumbers(this, nrow);
        lpmInput = mult.constructLops();
        HopRewriteUtils.removeChildReference(pmInput, transpose);
    } else // input vector
    {
        // compute NROW target via max(v)
        nrow = HopRewriteUtils.createAggUnaryOp(pmInput, AggOp.MAX, Direction.RowCol);
        nrow.setOutputBlocksizes(0, 0);
        nrow.setForcedExecType(etVect);
        HopRewriteUtils.copyLineNumbers(this, nrow);
    }
    // b) condensed permutation matrix vector input (target rows)
    boolean needPart = !pmInput.dimsKnown() || pmInput.getDim1() > DistributedCacheInput.PARTITION_SIZE;
    if (needPart) {
        // requires partitioning
        lpmInput = new DataPartition(lpmInput, DataType.MATRIX, ValueType.DOUBLE, etVect, PDataPartitionFormat.ROW_BLOCK_WISE_N);
        lpmInput.getOutputParameters().setDimensions(pmInput.getDim1(), 1, getRowsInBlock(), getColsInBlock(), pmInput.getDim1());
        setLineNumbers(lpmInput);
    }
    _outputEmptyBlocks = !OptimizerUtils.allowsToFilterEmptyBlockOutputs(this);
    PMMJ pmm = new PMMJ(lpmInput, rightInput.constructLops(), nrow.constructLops(), getDataType(), getValueType(), needPart, _outputEmptyBlocks, ExecType.MR);
    pmm.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    setLineNumbers(pmm);
    Aggregate aggregate = new Aggregate(pmm, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
    aggregate.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    // aggregation uses kahanSum but the inputs do not have correction values
    aggregate.setupCorrectionLocation(CorrectionLocationType.NONE);
    setLineNumbers(aggregate);
    setLops(aggregate);
    HopRewriteUtils.removeChildReference(pmInput, nrow);
}
Also used : MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) PMMJ(org.apache.sysml.lops.PMMJ)

Example 24 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class AggBinaryOp method constructMRLopsMapMMChain.

private void constructMRLopsMapMMChain(ChainType chainType) {
    Lop mapmult = null;
    if (chainType == ChainType.XtXv) {
        // v never needs partitioning because always single block
        Hop hX = getInput().get(0).getInput().get(0);
        Hop hv = getInput().get(1).getInput().get(1);
        // core matrix mult
        mapmult = new MapMultChain(hX.constructLops(), hv.constructLops(), getDataType(), getValueType(), ExecType.MR);
        mapmult.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(mapmult);
    } else // ChainType.XtwXv / ChainType.XtXvy
    {
        // v never needs partitioning because always single block
        int wix = (chainType == ChainType.XtwXv) ? 0 : 1;
        int vix = (chainType == ChainType.XtwXv) ? 1 : 0;
        Hop hX = getInput().get(0).getInput().get(0);
        Hop hw = getInput().get(1).getInput().get(wix);
        Hop hv = getInput().get(1).getInput().get(vix).getInput().get(1);
        double mestW = OptimizerUtils.estimateSize(hw.getDim1(), hw.getDim2());
        boolean needPart = !hw.dimsKnown() || hw.getDim1() * hw.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop X = hX.constructLops(), v = hv.constructLops(), w = null;
        if (needPart) {
            // requires partitioning
            w = new DataPartition(hw.constructLops(), DataType.MATRIX, ValueType.DOUBLE, (mestW > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            w.getOutputParameters().setDimensions(hw.getDim1(), hw.getDim2(), getRowsInBlock(), getColsInBlock(), hw.getNnz());
            setLineNumbers(w);
        } else
            w = hw.constructLops();
        // core matrix mult
        mapmult = new MapMultChain(X, v, w, chainType, getDataType(), getValueType(), ExecType.MR);
        mapmult.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(mapmult);
    }
    // post aggregation
    Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
    grp.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
    agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    // aggregation uses kahanSum
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    setLineNumbers(agg1);
    setLops(agg1);
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) MapMultChain(org.apache.sysml.lops.MapMultChain)

Example 25 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class AggBinaryOp method constructMRLopsMapMMWithLeftTransposeRewrite.

private Lop constructMRLopsMapMMWithLeftTransposeRewrite() {
    // guaranteed to exists
    Hop X = getInput().get(0).getInput().get(0);
    Hop Y = getInput().get(1);
    // right vector transpose CP
    Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
    setLineNumbers(tY);
    // matrix mult
    // If number of columns is smaller than block size then explicit aggregation is not required.
    // i.e., entire matrix multiplication can be performed in the mappers.
    boolean needAgg = (X.getDim1() <= 0 || X.getDim1() > X.getRowsInBlock());
    // R disregarding transpose rewrite
    boolean needPart = requiresPartitioning(MMultMethod.MAPMM_R, true);
    // pre partitioning
    Lop dcinput = null;
    if (needPart) {
        ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(Y.getDim2(), Y.getDim1(), OptimizerUtils.getSparsity(Y.getDim2(), Y.getDim1(), Y.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
        ExecType.MR;
        dcinput = new DataPartition(tY, DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
        dcinput.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
        setLineNumbers(dcinput);
    } else
        dcinput = tY;
    MapMult mapmult = new MapMult(dcinput, X.constructLops(), getDataType(), getValueType(), false, needPart, false);
    mapmult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    setLineNumbers(mapmult);
    // post aggregation
    Lop mult = null;
    if (needAgg) {
        Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
        grp.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
        agg1.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(agg1);
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        mult = agg1;
    } else
        mult = mapmult;
    // result transpose CP
    Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    out.getOutputParameters().setDimensions(X.getDim2(), Y.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    return out;
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) PMapMult(org.apache.sysml.lops.PMapMult) MapMult(org.apache.sysml.lops.MapMult) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition)

Aggregations

Lop (org.apache.sysml.lops.Lop)171 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)66 ExecType (org.apache.sysml.lops.LopProperties.ExecType)52 Group (org.apache.sysml.lops.Group)45 ArrayList (java.util.ArrayList)35 Aggregate (org.apache.sysml.lops.Aggregate)32 DataPartition (org.apache.sysml.lops.DataPartition)30 LopsException (org.apache.sysml.lops.LopsException)30 Data (org.apache.sysml.lops.Data)24 Instruction (org.apache.sysml.runtime.instructions.Instruction)23 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)18 Unary (org.apache.sysml.lops.Unary)16 Transform (org.apache.sysml.lops.Transform)15 HashMap (java.util.HashMap)14 UnaryCP (org.apache.sysml.lops.UnaryCP)14 Dag (org.apache.sysml.lops.compile.Dag)13 Hop (org.apache.sysml.hops.Hop)11 RepMat (org.apache.sysml.lops.RepMat)11 Binary (org.apache.sysml.lops.Binary)9 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)9