Search in sources :

Example 6 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedCeMM.

private void constructMRLopsWeightedCeMM(WCeMMType wtype) {
    // NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop eps = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWcemm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wcemm always with broadcast
        Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wcemm w/ or without broadcast
        Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedCrossEntropyR(org.apache.sysml.lops.WeightedCrossEntropyR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedCrossEntropy(org.apache.sysml.lops.WeightedCrossEntropy) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 7 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class TernaryOp method constructLopsTernaryDefault.

private void constructLopsTernaryDefault() {
    ExecType et = optFindExecType();
    if (getInput().stream().allMatch(h -> h.getDataType().isScalar()))
        // always CP for pure scalar operations
        et = ExecType.CP;
    Ternary plusmult = null;
    if (et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU) {
        plusmult = new Ternary(HopsOpOp3Lops.get(_op), getInput().get(0).constructLops(), getInput().get(1).constructLops(), getInput().get(2).constructLops(), getDataType(), getValueType(), et);
    } else {
        // MR
        Hop first = getInput().get(0);
        Hop second = getInput().get(1);
        Hop third = getInput().get(2);
        Lop firstLop = first.constructLops();
        if (first.getDataType().isMatrix()) {
            firstLop = new Group(firstLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(firstLop);
            setOutputDimensions(firstLop);
        }
        Lop secondLop = second.constructLops();
        if (second.getDataType().isMatrix()) {
            secondLop = new Group(secondLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(secondLop);
            setOutputDimensions(secondLop);
        }
        Lop thirdLop = third.constructLops();
        if (third.getDataType().isMatrix()) {
            thirdLop = new Group(thirdLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(thirdLop);
            setOutputDimensions(thirdLop);
        }
        plusmult = new Ternary(HopsOpOp3Lops.get(_op), firstLop, secondLop, thirdLop, getDataType(), getValueType(), et);
    }
    setOutputDimensions(plusmult);
    setLineNumbers(plusmult);
    setLops(plusmult);
}
Also used : Group(org.apache.sysml.lops.Group) Ternary(org.apache.sysml.lops.Ternary) CombineTernary(org.apache.sysml.lops.CombineTernary) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop)

Example 8 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class UnaryOp method constructLopsMRCumulativeUnary.

/**
 * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent offsetting.
 * Note that we currently support one robust physical operator but many alternative
 * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate
 * fit into the map task memory budget) or by creating custom job types.
 *
 * @return low-level operator
 */
private Lop constructLopsMRCumulativeUnary() {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.MR;
    OperationTypes aggtype = getCumulativeAggType();
    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<>();
    int level = 0;
    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) {
        DATA.add(TEMP);
        // preaggregation per block
        long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
        Lop preagg = new CumulativePartialAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
        preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(preagg);
        Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(group);
        Aggregate agg = new Aggregate(group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
        agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        // aggregation uses kahanSum but the inputs do not have correction values
        agg.setupCorrectionLocation(CorrectionLocationType.NONE);
        setLineNumbers(agg);
        TEMP = agg;
        level++;
        // in case of unknowns, generate one level
        force = false;
    }
    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
        int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
        Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
        unary1.getOutputParameters().setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
        setLineNumbers(unary1);
        TEMP = unary1;
    }
    // split, group and mr cumsum
    while (level-- > 0) {
        double init = getCumulativeInitValue();
        CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init);
        split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(split);
        Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(group1);
        Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(group2);
        CumulativeOffsetBinary binary = new CumulativeOffsetBinary(group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
        binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(binary);
        TEMP = binary;
    }
    return TEMP;
}
Also used : Group(org.apache.sysml.lops.Group) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ArrayList(java.util.ArrayList) Lop(org.apache.sysml.lops.Lop) CombineUnary(org.apache.sysml.lops.CombineUnary) Unary(org.apache.sysml.lops.Unary) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate) OperationTypes(org.apache.sysml.lops.Aggregate.OperationTypes) CumulativeOffsetBinary(org.apache.sysml.lops.CumulativeOffsetBinary) PartialAggregate(org.apache.sysml.lops.PartialAggregate) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) Aggregate(org.apache.sysml.lops.Aggregate) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate)

Example 9 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class AggBinaryOp method constructMRLopsCPMM.

private void constructMRLopsCPMM() {
    if (isLeftTransposeRewriteApplicable(false, false)) {
        setLops(constructMRLopsCPMMWithLeftTransposeRewrite());
    } else // general case
    {
        Hop X = getInput().get(0);
        Hop Y = getInput().get(1);
        MMCJType type = getMMCJAggregationType(X, Y);
        MMCJ mmcj = new MMCJ(X.constructLops(), Y.constructLops(), getDataType(), getValueType(), type, ExecType.MR);
        setOutputDimensions(mmcj);
        setLineNumbers(mmcj);
        Group grp = new Group(mmcj, Group.OperationTypes.Sort, getDataType(), getValueType());
        setOutputDimensions(grp);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
        setOutputDimensions(agg1);
        setLineNumbers(agg1);
        // aggregation uses kahanSum but the inputs do not have correction values
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        setLops(agg1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MMCJ(org.apache.sysml.lops.MMCJ) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) MMCJType(org.apache.sysml.lops.MMCJ.MMCJType) Aggregate(org.apache.sysml.lops.Aggregate)

Example 10 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class AggBinaryOp method constructMRLopsMapMMChain.

private void constructMRLopsMapMMChain(ChainType chainType) {
    Lop mapmult = null;
    if (chainType == ChainType.XtXv) {
        // v never needs partitioning because always single block
        Hop hX = getInput().get(0).getInput().get(0);
        Hop hv = getInput().get(1).getInput().get(1);
        // core matrix mult
        mapmult = new MapMultChain(hX.constructLops(), hv.constructLops(), getDataType(), getValueType(), ExecType.MR);
        mapmult.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(mapmult);
    } else // ChainType.XtwXv / ChainType.XtXvy
    {
        // v never needs partitioning because always single block
        int wix = (chainType == ChainType.XtwXv) ? 0 : 1;
        int vix = (chainType == ChainType.XtwXv) ? 1 : 0;
        Hop hX = getInput().get(0).getInput().get(0);
        Hop hw = getInput().get(1).getInput().get(wix);
        Hop hv = getInput().get(1).getInput().get(vix).getInput().get(1);
        double mestW = OptimizerUtils.estimateSize(hw.getDim1(), hw.getDim2());
        boolean needPart = !hw.dimsKnown() || hw.getDim1() * hw.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop X = hX.constructLops(), v = hv.constructLops(), w = null;
        if (needPart) {
            // requires partitioning
            w = new DataPartition(hw.constructLops(), DataType.MATRIX, ValueType.DOUBLE, (mestW > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            w.getOutputParameters().setDimensions(hw.getDim1(), hw.getDim2(), getRowsInBlock(), getColsInBlock(), hw.getNnz());
            setLineNumbers(w);
        } else
            w = hw.constructLops();
        // core matrix mult
        mapmult = new MapMultChain(X, v, w, chainType, getDataType(), getValueType(), ExecType.MR);
        mapmult.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(mapmult);
    }
    // post aggregation
    Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
    grp.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
    agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    // aggregation uses kahanSum
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    setLineNumbers(agg1);
    setLops(agg1);
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) MapMultChain(org.apache.sysml.lops.MapMultChain)

Aggregations

Group (org.apache.sysml.lops.Group)55 Lop (org.apache.sysml.lops.Lop)45 Aggregate (org.apache.sysml.lops.Aggregate)38 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)32 DataPartition (org.apache.sysml.lops.DataPartition)28 ExecType (org.apache.sysml.lops.LopProperties.ExecType)25 UnaryCP (org.apache.sysml.lops.UnaryCP)14 RepMat (org.apache.sysml.lops.RepMat)11 PartialAggregate (org.apache.sysml.lops.PartialAggregate)10 Unary (org.apache.sysml.lops.Unary)10 CombineUnary (org.apache.sysml.lops.CombineUnary)8 Transform (org.apache.sysml.lops.Transform)8 AppendR (org.apache.sysml.lops.AppendR)6 Data (org.apache.sysml.lops.Data)6 GroupedAggregate (org.apache.sysml.lops.GroupedAggregate)6 SortKeys (org.apache.sysml.lops.SortKeys)6 ArrayList (java.util.ArrayList)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)4 OperationTypes (org.apache.sysml.lops.Aggregate.OperationTypes)4 Binary (org.apache.sysml.lops.Binary)4