Search in sources :

Example 46 with Group

use of org.apache.sysml.lops.Group in project systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedDivMM.

private void constructMRLopsWeightedDivMM(WDivMMType wtype) {
    // NOTE: the common case for wdivmm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted DivMM only if this constraint holds.
    Hop W = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop X = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWdivmm = ((!wtype.hasFourInputs() || wtype.hasScalar()) && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWdivmm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wdivmm always with broadcast
        Lop wdivmm = new WeightedDivMM(W.constructLops(), lU, lV, X.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    } else // general case
    {
        // MR operator selection part 2 (both cannot happen for wdivmm, otherwise mapwdivmm)
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), W.getNnz());
        setLineNumbers(grpW);
        Lop grpX = X.constructLops();
        if (wtype.hasFourInputs() && (X.getDataType() != DataType.SCALAR))
            grpX = new Group(grpX, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wdivmm w/ or without broadcast
        Lop wdivmm = new WeightedDivMMR(grpW, lU, lV, grpX, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    }
    // in contrast to to wsloss/wsigmoid, wdivmm requires partial aggregation (for the final mm)
    Group grp = new Group(getLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
    setOutputDimensions(grp);
    setLineNumbers(grp);
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
    // aggregation uses kahanSum but the inputs do not have correction values
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    setOutputDimensions(agg1);
    setLineNumbers(agg1);
    setLops(agg1);
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) WeightedDivMM(org.apache.sysml.lops.WeightedDivMM) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) WeightedDivMMR(org.apache.sysml.lops.WeightedDivMMR)

Example 47 with Group

use of org.apache.sysml.lops.Group in project systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedCeMM.

private void constructMRLopsWeightedCeMM(WCeMMType wtype) {
    // NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop eps = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWcemm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wcemm always with broadcast
        Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wcemm w/ or without broadcast
        Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedCrossEntropyR(org.apache.sysml.lops.WeightedCrossEntropyR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedCrossEntropy(org.apache.sysml.lops.WeightedCrossEntropy) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 48 with Group

use of org.apache.sysml.lops.Group in project systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedUMM.

private void constructMRLopsWeightedUMM(WUMMType wtype) {
    // NOTE: the common case for wumm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted UnaryMM  only if this constraint holds.
    Unary.OperationTypes uop = _uop != null ? HopsOpOp1LopsU.get(_uop) : _sop == OpOp2.POW ? Unary.OperationTypes.POW2 : Unary.OperationTypes.MULTIPLY2;
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWumm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWumm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wumm always with broadcast
        Lop wumm = new WeightedUnaryMM(X.constructLops(), lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, ExecType.MR);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    // in contrast to wsloss no aggregation required
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wumm w/ or without broadcast
        Lop wumm = new WeightedUnaryMMR(grpX, lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    // in contrast to wsloss no aggregation required
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedUnaryMMR(org.apache.sysml.lops.WeightedUnaryMMR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedUnaryMM(org.apache.sysml.lops.WeightedUnaryMM) Lop(org.apache.sysml.lops.Lop) DataPartition(org.apache.sysml.lops.DataPartition) Unary(org.apache.sysml.lops.Unary)

Example 49 with Group

use of org.apache.sysml.lops.Group in project systemml by apache.

the class QuaternaryOp method constructRightFactorMRLop.

private Lop constructRightFactorMRLop(Hop U, Hop V, boolean cacheV, double m2Size) {
    Lop lV = null;
    if (cacheV) {
        // partitioning of V for read through distributed cache
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
    } else {
        // replication of t(V) for shuffle to target block
        Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
        ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
        setLineNumbers(ltV);
        // nrow of U determines num replicates
        Lop offset = createOffsetLop(U, false);
        lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
        lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
        setLineNumbers(lV);
        Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
        setLineNumbers(grpV);
        lV = grpV;
    }
    return lV;
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform) DataPartition(org.apache.sysml.lops.DataPartition)

Example 50 with Group

use of org.apache.sysml.lops.Group in project systemml by apache.

the class TernaryOp method constructLopsTernaryDefault.

private void constructLopsTernaryDefault() {
    ExecType et = optFindExecType();
    if (getInput().stream().allMatch(h -> h.getDataType().isScalar()))
        // always CP for pure scalar operations
        et = ExecType.CP;
    Ternary plusmult = null;
    if (et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU) {
        plusmult = new Ternary(HopsOpOp3Lops.get(_op), getInput().get(0).constructLops(), getInput().get(1).constructLops(), getInput().get(2).constructLops(), getDataType(), getValueType(), et);
    } else {
        // MR
        Hop first = getInput().get(0);
        Hop second = getInput().get(1);
        Hop third = getInput().get(2);
        Lop firstLop = first.constructLops();
        if (first.getDataType().isMatrix()) {
            firstLop = new Group(firstLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(firstLop);
            setOutputDimensions(firstLop);
        }
        Lop secondLop = second.constructLops();
        if (second.getDataType().isMatrix()) {
            secondLop = new Group(secondLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(secondLop);
            setOutputDimensions(secondLop);
        }
        Lop thirdLop = third.constructLops();
        if (third.getDataType().isMatrix()) {
            thirdLop = new Group(thirdLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(thirdLop);
            setOutputDimensions(thirdLop);
        }
        plusmult = new Ternary(HopsOpOp3Lops.get(_op), firstLop, secondLop, thirdLop, getDataType(), getValueType(), et);
    }
    setOutputDimensions(plusmult);
    setLineNumbers(plusmult);
    setLops(plusmult);
}
Also used : Group(org.apache.sysml.lops.Group) Ternary(org.apache.sysml.lops.Ternary) CombineTernary(org.apache.sysml.lops.CombineTernary) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop)

Aggregations

Group (org.apache.sysml.lops.Group)55 Lop (org.apache.sysml.lops.Lop)45 Aggregate (org.apache.sysml.lops.Aggregate)38 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)32 DataPartition (org.apache.sysml.lops.DataPartition)28 ExecType (org.apache.sysml.lops.LopProperties.ExecType)25 UnaryCP (org.apache.sysml.lops.UnaryCP)14 RepMat (org.apache.sysml.lops.RepMat)11 PartialAggregate (org.apache.sysml.lops.PartialAggregate)10 Unary (org.apache.sysml.lops.Unary)10 CombineUnary (org.apache.sysml.lops.CombineUnary)8 Transform (org.apache.sysml.lops.Transform)8 AppendR (org.apache.sysml.lops.AppendR)6 Data (org.apache.sysml.lops.Data)6 GroupedAggregate (org.apache.sysml.lops.GroupedAggregate)6 SortKeys (org.apache.sysml.lops.SortKeys)6 ArrayList (java.util.ArrayList)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)4 OperationTypes (org.apache.sysml.lops.Aggregate.OperationTypes)4 Binary (org.apache.sysml.lops.Binary)4