Search in sources :

Example 1 with Transform

use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedDivMM.

private void constructMRLopsWeightedDivMM(WDivMMType wtype) throws HopsException, LopsException {
    //NOTE: the common case for wdivmm are factors U/V with a rank of 10s to 100s; the current runtime only
    //supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    //by applying the hop rewrite for Weighted DivMM only if this constraint holds. 
    Hop W = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop X = getInput().get(3);
    //MR operator selection, part1
    //size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    //size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWdivmm = ((!wtype.hasFourInputs() || wtype.hasScalar()) && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (//broadcast
    !FORCE_REPLICATION && isMapWdivmm) {
        //partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            //requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        //partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            //requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        //map-side wdivmm always with broadcast
        Lop wdivmm = new WeightedDivMM(W.constructLops(), lU, lV, X.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    } else //general case
    {
        //MR operator selection part 2 (both cannot happen for wdivmm, otherwise mapwdivmm)
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), W.getNnz());
        setLineNumbers(grpW);
        Lop grpX = X.constructLops();
        if (wtype.hasFourInputs() && (X.getDataType() != DataType.SCALAR))
            grpX = new Group(grpX, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = null;
        if (cacheU) {
            //partitioning of U for read through distributed cache
            boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lU = U.constructLops();
            if (needPartU) {
                //requires partitioning
                lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
                setLineNumbers(lU);
            }
        } else {
            //replication of U for shuffle to target block
            //ncol of t(V) -> nrow of V determines num replicates
            Lop offset = createOffsetLop(V, false);
            lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
            Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
            setLineNumbers(grpU);
            lU = grpU;
        }
        Lop lV = null;
        if (cacheV) {
            //partitioning of V for read through distributed cache
            boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lV = V.constructLops();
            if (needPartV) {
                //requires partitioning
                lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
                setLineNumbers(lV);
            }
        } else {
            //replication of t(V) for shuffle to target block
            Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
            ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(ltV);
            //nrow of U determines num replicates
            Lop offset = createOffsetLop(U, false);
            lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
            lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(lV);
            Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
            setLineNumbers(grpV);
            lV = grpV;
        }
        //reduce-side wdivmm w/ or without broadcast
        Lop wdivmm = new WeightedDivMMR(grpW, lU, lV, grpX, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    }
    //in contrast to to wsloss/wsigmoid, wdivmm requires partial aggregation (for the final mm)
    Group grp = new Group(getLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
    setOutputDimensions(grp);
    setLineNumbers(grp);
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
    // aggregation uses kahanSum but the inputs do not have correction values
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    setOutputDimensions(agg1);
    setLineNumbers(agg1);
    setLops(agg1);
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) WeightedDivMM(org.apache.sysml.lops.WeightedDivMM) Transform(org.apache.sysml.lops.Transform) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) WeightedDivMMR(org.apache.sysml.lops.WeightedDivMMR)

Example 2 with Transform

use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedCeMM.

private void constructMRLopsWeightedCeMM(WCeMMType wtype) throws HopsException, LopsException {
    //NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
    //supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    //by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds. 
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop eps = getInput().get(3);
    //MR operator selection, part1
    //size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    //size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (//broadcast
    !FORCE_REPLICATION && isMapWcemm) {
        //partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            //requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        //partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            //requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        //map-side wcemm always with broadcast
        Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum 
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else //general case
    {
        //MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop lU = null;
        if (cacheU) {
            //partitioning of U for read through distributed cache
            boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lU = U.constructLops();
            if (needPartU) {
                //requires partitioning
                lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
                setLineNumbers(lU);
            }
        } else {
            //replication of U for shuffle to target block
            //ncol of t(V) -> nrow of V determines num replicates
            Lop offset = createOffsetLop(V, false);
            lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
            Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
            setLineNumbers(grpU);
            lU = grpU;
        }
        Lop lV = null;
        if (cacheV) {
            //partitioning of V for read through distributed cache
            boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lV = V.constructLops();
            if (needPartV) {
                //requires partitioning
                lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
                setLineNumbers(lV);
            }
        } else {
            //replication of t(V) for shuffle to target block
            Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
            ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(ltV);
            //nrow of U determines num replicates
            Lop offset = createOffsetLop(U, false);
            lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
            lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(lV);
            Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
            setLineNumbers(grpV);
            lV = grpV;
        }
        //reduce-side wcemm w/ or without broadcast
        Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum 
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) WeightedCrossEntropyR(org.apache.sysml.lops.WeightedCrossEntropyR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedCrossEntropy(org.apache.sysml.lops.WeightedCrossEntropy) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) Transform(org.apache.sysml.lops.Transform) DataPartition(org.apache.sysml.lops.DataPartition) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 3 with Transform

use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.

the class AggBinaryOp method constructSparkLopsMapMMWithLeftTransposeRewrite.

private Lop constructSparkLopsMapMMWithLeftTransposeRewrite() throws HopsException, LopsException {
    //guaranteed to exists
    Hop X = getInput().get(0).getInput().get(0);
    Hop Y = getInput().get(1);
    //right vector transpose
    Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
    setLineNumbers(tY);
    //matrix mult spark
    boolean needAgg = requiresAggregation(MMultMethod.MAPMM_R);
    SparkAggType aggtype = getSparkMMAggregationType(needAgg);
    _outputEmptyBlocks = !OptimizerUtils.allowsToFilterEmptyBlockOutputs(this);
    Lop mult = new MapMult(tY, X.constructLops(), getDataType(), getValueType(), false, false, _outputEmptyBlocks, aggtype);
    mult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    setLineNumbers(mult);
    //result transpose (dimensions set outside)
    Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    return out;
}
Also used : MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) PMapMult(org.apache.sysml.lops.PMapMult) MapMult(org.apache.sysml.lops.MapMult) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform)

Example 4 with Transform

use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.

the class AggBinaryOp method constructMRLopsMapMMWithLeftTransposeRewrite.

private Lop constructMRLopsMapMMWithLeftTransposeRewrite() throws HopsException, LopsException {
    //guaranteed to exists
    Hop X = getInput().get(0).getInput().get(0);
    Hop Y = getInput().get(1);
    //right vector transpose CP
    Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
    setLineNumbers(tY);
    //matrix mult
    // If number of columns is smaller than block size then explicit aggregation is not required.
    // i.e., entire matrix multiplication can be performed in the mappers.
    boolean needAgg = (X.getDim1() <= 0 || X.getDim1() > X.getRowsInBlock());
    //R disregarding transpose rewrite
    boolean needPart = requiresPartitioning(MMultMethod.MAPMM_R, true);
    //pre partitioning
    Lop dcinput = null;
    if (needPart) {
        ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(Y.getDim2(), Y.getDim1(), OptimizerUtils.getSparsity(Y.getDim2(), Y.getDim1(), Y.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : //operator selection
        ExecType.MR;
        dcinput = new DataPartition(tY, DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
        dcinput.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
        setLineNumbers(dcinput);
    } else
        dcinput = tY;
    MapMult mapmult = new MapMult(dcinput, X.constructLops(), getDataType(), getValueType(), false, needPart, false);
    mapmult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    setLineNumbers(mapmult);
    //post aggregation 
    Lop mult = null;
    if (needAgg) {
        Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
        grp.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
        agg1.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(agg1);
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        mult = agg1;
    } else
        mult = mapmult;
    //result transpose CP 
    Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    out.getOutputParameters().setDimensions(X.getDim2(), Y.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    return out;
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) PMapMult(org.apache.sysml.lops.PMapMult) MapMult(org.apache.sysml.lops.MapMult) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition)

Example 5 with Transform

use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedUMM.

private void constructMRLopsWeightedUMM(WUMMType wtype) throws HopsException, LopsException {
    //NOTE: the common case for wumm are factors U/V with a rank of 10s to 100s; the current runtime only
    //supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    //by applying the hop rewrite for Weighted UnaryMM  only if this constraint holds. 
    Unary.OperationTypes uop = _uop != null ? HopsOpOp1LopsU.get(_uop) : _sop == OpOp2.POW ? Unary.OperationTypes.POW2 : Unary.OperationTypes.MULTIPLY2;
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    //MR operator selection, part1
    //size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    //size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWumm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (//broadcast
    !FORCE_REPLICATION && isMapWumm) {
        //partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            //requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        //partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            //requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        //map-side wumm always with broadcast
        Lop wumm = new WeightedUnaryMM(X.constructLops(), lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, ExecType.MR);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    //in contrast to wsloss no aggregation required 
    } else //general case
    {
        //MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = null;
        if (cacheU) {
            //partitioning of U for read through distributed cache
            boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lU = U.constructLops();
            if (needPartU) {
                //requires partitioning
                lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
                setLineNumbers(lU);
            }
        } else {
            //replication of U for shuffle to target block
            //ncol of t(V) -> nrow of V determines num replicates
            Lop offset = createOffsetLop(V, false);
            lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
            Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
            setLineNumbers(grpU);
            lU = grpU;
        }
        Lop lV = null;
        if (cacheV) {
            //partitioning of V for read through distributed cache
            boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lV = V.constructLops();
            if (needPartV) {
                //requires partitioning
                lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
                setLineNumbers(lV);
            }
        } else {
            //replication of t(V) for shuffle to target block
            Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
            ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(ltV);
            //nrow of U determines num replicates
            Lop offset = createOffsetLop(U, false);
            lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
            lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(lV);
            Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
            setLineNumbers(grpV);
            lV = grpV;
        }
        //reduce-side wumm w/ or without broadcast
        Lop wumm = new WeightedUnaryMMR(grpX, lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    //in contrast to wsloss no aggregation required 	
    }
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) WeightedUnaryMMR(org.apache.sysml.lops.WeightedUnaryMMR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedUnaryMM(org.apache.sysml.lops.WeightedUnaryMM) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform) DataPartition(org.apache.sysml.lops.DataPartition) Unary(org.apache.sysml.lops.Unary)

Aggregations

MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)12 Lop (org.apache.sysml.lops.Lop)12 Transform (org.apache.sysml.lops.Transform)12 Group (org.apache.sysml.lops.Group)8 Aggregate (org.apache.sysml.lops.Aggregate)6 DataPartition (org.apache.sysml.lops.DataPartition)6 RepMat (org.apache.sysml.lops.RepMat)5 ExecType (org.apache.sysml.lops.LopProperties.ExecType)2 MMCJ (org.apache.sysml.lops.MMCJ)2 MapMult (org.apache.sysml.lops.MapMult)2 PMapMult (org.apache.sysml.lops.PMapMult)2 UnaryCP (org.apache.sysml.lops.UnaryCP)2 ArrayList (java.util.ArrayList)1 Binary (org.apache.sysml.lops.Binary)1 MMCJType (org.apache.sysml.lops.MMCJ.MMCJType)1 SortKeys (org.apache.sysml.lops.SortKeys)1 Unary (org.apache.sysml.lops.Unary)1 WeightedCrossEntropy (org.apache.sysml.lops.WeightedCrossEntropy)1 WeightedCrossEntropyR (org.apache.sysml.lops.WeightedCrossEntropyR)1 WeightedDivMM (org.apache.sysml.lops.WeightedDivMM)1