Search in sources :

Example 36 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method constructLopsRExpand.

private void constructLopsRExpand(HashMap<String, Lop> inputlops, ExecType et) {
    if (et == ExecType.CP || et == ExecType.SPARK) {
        int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
        ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, k);
        setOutputDimensions(pbilop);
        setLineNumbers(pbilop);
        setLops(pbilop);
    } else if (et == ExecType.MR) {
        ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
        setOutputDimensions(pbilop);
        setLineNumbers(pbilop);
        Group group1 = new Group(pbilop, Group.OperationTypes.Sort, getDataType(), getValueType());
        setOutputDimensions(group1);
        setLineNumbers(group1);
        Aggregate finalagg = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
        setOutputDimensions(finalagg);
        setLineNumbers(finalagg);
        setLops(finalagg);
    }
}
Also used : Group(org.apache.sysml.lops.Group) ParameterizedBuiltin(org.apache.sysml.lops.ParameterizedBuiltin) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate)

Example 37 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedSquaredLoss.

private void constructMRLopsWeightedSquaredLoss(WeightsType wtype) {
    // NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop W = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWsloss) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wsloss always with broadcast
        Lop wsloss = new WeightedSquaredLoss(X.constructLops(), lU, lV, W.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wsloss);
        Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop grpW = W.constructLops();
        if (grpW.getDataType() == DataType.MATRIX) {
            grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), -1);
            setLineNumbers(grpW);
        }
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wsloss w/ or without broadcast
        Lop wsloss = new WeightedSquaredLossR(grpX, lU, lV, grpW, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wsloss);
        Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedSquaredLoss(org.apache.sysml.lops.WeightedSquaredLoss) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) WeightedSquaredLossR(org.apache.sysml.lops.WeightedSquaredLossR) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 38 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedSigmoid.

private void constructMRLopsWeightedSigmoid(WSigmoidType wtype) {
    // NOTE: the common case for wsigmoid are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Sigmoid only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsig = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWsig) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wsig always with broadcast
        Lop wsigmoid = new WeightedSigmoid(X.constructLops(), lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        setOutputDimensions(wsigmoid);
        setLineNumbers(wsigmoid);
        setLops(wsigmoid);
    // in contrast to wsloss no aggregation required
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wsig w/ or without broadcast
        Lop wsigmoid = new WeightedSigmoidR(grpX, lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wsigmoid);
        setLineNumbers(wsigmoid);
        setLops(wsigmoid);
    // in contrast to wsloss no aggregation required
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedSigmoidR(org.apache.sysml.lops.WeightedSigmoidR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedSigmoid(org.apache.sysml.lops.WeightedSigmoid) Lop(org.apache.sysml.lops.Lop) DataPartition(org.apache.sysml.lops.DataPartition)

Example 39 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedUMM.

private void constructMRLopsWeightedUMM(WUMMType wtype) {
    // NOTE: the common case for wumm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted UnaryMM  only if this constraint holds.
    Unary.OperationTypes uop = _uop != null ? HopsOpOp1LopsU.get(_uop) : _sop == OpOp2.POW ? Unary.OperationTypes.POW2 : Unary.OperationTypes.MULTIPLY2;
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWumm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWumm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wumm always with broadcast
        Lop wumm = new WeightedUnaryMM(X.constructLops(), lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, ExecType.MR);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    // in contrast to wsloss no aggregation required
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wumm w/ or without broadcast
        Lop wumm = new WeightedUnaryMMR(grpX, lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    // in contrast to wsloss no aggregation required
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedUnaryMMR(org.apache.sysml.lops.WeightedUnaryMMR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedUnaryMM(org.apache.sysml.lops.WeightedUnaryMM) Lop(org.apache.sysml.lops.Lop) DataPartition(org.apache.sysml.lops.DataPartition) Unary(org.apache.sysml.lops.Unary)

Example 40 with Group

use of org.apache.sysml.lops.Group in project incubator-systemml by apache.

the class QuaternaryOp method constructLeftFactorMRLop.

private Lop constructLeftFactorMRLop(Hop U, Hop V, boolean cacheU, double m1Size) {
    Lop lU = null;
    if (cacheU) {
        // partitioning of U for read through distributed cache
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
    } else {
        // replication of U for shuffle to target block
        // ncol of t(V) -> nrow of V determines num replicates
        Lop offset = createOffsetLop(V, false);
        lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
        lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
        setLineNumbers(lU);
        Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
        setLineNumbers(grpU);
        lU = grpU;
    }
    return lU;
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) Lop(org.apache.sysml.lops.Lop) DataPartition(org.apache.sysml.lops.DataPartition)

Aggregations

Group (org.apache.sysml.lops.Group)55 Lop (org.apache.sysml.lops.Lop)45 Aggregate (org.apache.sysml.lops.Aggregate)38 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)32 DataPartition (org.apache.sysml.lops.DataPartition)28 ExecType (org.apache.sysml.lops.LopProperties.ExecType)25 UnaryCP (org.apache.sysml.lops.UnaryCP)14 RepMat (org.apache.sysml.lops.RepMat)11 PartialAggregate (org.apache.sysml.lops.PartialAggregate)10 Unary (org.apache.sysml.lops.Unary)10 CombineUnary (org.apache.sysml.lops.CombineUnary)8 Transform (org.apache.sysml.lops.Transform)8 AppendR (org.apache.sysml.lops.AppendR)6 Data (org.apache.sysml.lops.Data)6 GroupedAggregate (org.apache.sysml.lops.GroupedAggregate)6 SortKeys (org.apache.sysml.lops.SortKeys)6 ArrayList (java.util.ArrayList)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)4 OperationTypes (org.apache.sysml.lops.Aggregate.OperationTypes)4 Binary (org.apache.sysml.lops.Binary)4