Search in sources :

Example 1 with WeightedSquaredLossR

use of org.apache.sysml.lops.WeightedSquaredLossR in project incubator-systemml by apache.

the class QuaternaryOp method constructSparkLopsWeightedSquaredLoss.

private void constructSparkLopsWeightedSquaredLoss(WeightsType wtype) throws HopsException, LopsException {
    //NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
    //supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    //by applying the hop rewrite for Weighted Squared Loss only if this constraint holds. 
    //Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
    //and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
    //required because the max_int byte buffer constraint has been fixed in Spark 1.4 
    double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
    double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop W = getInput().get(3);
    //MR operator selection, part1
    //size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    //size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
    if (//broadcast
    !FORCE_REPLICATION && isMapWsloss) {
        //map-side wsloss always with broadcast
        Lop wsloss = new WeightedSquaredLoss(X.constructLops(), U.constructLops(), V.constructLops(), W.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, ExecType.SPARK);
        setOutputDimensions(wsloss);
        setLineNumbers(wsloss);
        setLops(wsloss);
    } else //general case
    {
        //MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
        //reduce-side wsloss w/ or without broadcast
        Lop wsloss = new WeightedSquaredLossR(X.constructLops(), U.constructLops(), V.constructLops(), W.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.SPARK);
        setOutputDimensions(wsloss);
        setLineNumbers(wsloss);
        setLops(wsloss);
    }
}
Also used : MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedSquaredLoss(org.apache.sysml.lops.WeightedSquaredLoss) Lop(org.apache.sysml.lops.Lop) WeightedSquaredLossR(org.apache.sysml.lops.WeightedSquaredLossR)

Example 2 with WeightedSquaredLossR

use of org.apache.sysml.lops.WeightedSquaredLossR in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedSquaredLoss.

private void constructMRLopsWeightedSquaredLoss(WeightsType wtype) throws HopsException, LopsException {
    //NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
    //supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    //by applying the hop rewrite for Weighted Squared Loss only if this constraint holds. 
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop W = getInput().get(3);
    //MR operator selection, part1
    //size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    //size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (//broadcast
    !FORCE_REPLICATION && isMapWsloss) {
        //partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            //requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        //partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            //requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        //map-side wsloss always with broadcast
        Lop wsloss = new WeightedSquaredLoss(X.constructLops(), lU, lV, W.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wsloss);
        Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum 
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else //general case
    {
        //MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop grpW = W.constructLops();
        if (grpW.getDataType() == DataType.MATRIX) {
            grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), -1);
            setLineNumbers(grpW);
        }
        Lop lU = null;
        if (cacheU) {
            //partitioning of U for read through distributed cache
            boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lU = U.constructLops();
            if (needPartU) {
                //requires partitioning
                lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
                setLineNumbers(lU);
            }
        } else {
            //replication of U for shuffle to target block
            //ncol of t(V) -> nrow of V determines num replicates
            Lop offset = createOffsetLop(V, false);
            lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
            Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
            setLineNumbers(grpU);
            lU = grpU;
        }
        Lop lV = null;
        if (cacheV) {
            //partitioning of V for read through distributed cache
            boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
            lV = V.constructLops();
            if (needPartV) {
                //requires partitioning
                lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
                setLineNumbers(lV);
            }
        } else {
            //replication of t(V) for shuffle to target block
            Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
            ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(ltV);
            //nrow of U determines num replicates
            Lop offset = createOffsetLop(U, false);
            lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
            lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
            setLineNumbers(lV);
            Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
            setLineNumbers(grpV);
            lV = grpV;
        }
        //reduce-side wsloss w/ or without broadcast
        Lop wsloss = new WeightedSquaredLossR(grpX, lU, lV, grpW, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wsloss);
        Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum 
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedSquaredLoss(org.apache.sysml.lops.WeightedSquaredLoss) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) Transform(org.apache.sysml.lops.Transform) DataPartition(org.apache.sysml.lops.DataPartition) WeightedSquaredLossR(org.apache.sysml.lops.WeightedSquaredLossR) UnaryCP(org.apache.sysml.lops.UnaryCP)

Aggregations

MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)2 Lop (org.apache.sysml.lops.Lop)2 WeightedSquaredLoss (org.apache.sysml.lops.WeightedSquaredLoss)2 WeightedSquaredLossR (org.apache.sysml.lops.WeightedSquaredLossR)2 Aggregate (org.apache.sysml.lops.Aggregate)1 DataPartition (org.apache.sysml.lops.DataPartition)1 Group (org.apache.sysml.lops.Group)1 RepMat (org.apache.sysml.lops.RepMat)1 Transform (org.apache.sysml.lops.Transform)1 UnaryCP (org.apache.sysml.lops.UnaryCP)1