Search in sources :

Example 11 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.

private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) {
    // reset reblock requirement (see MR aggregate / construct lops)
    setRequiresReblock(false);
    // determine output dimensions
    long outputDim1 = -1, outputDim2 = -1;
    Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
    if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
        long ngroups = ((Data) numGroups).getLongValue();
        Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
        long inDim1 = input.getOutputParameters().getNumRows();
        long inDim2 = input.getOutputParameters().getNumCols();
        boolean rowwise = (inDim1 == 1 && inDim2 > 1);
        if (rowwise) {
            // vector
            outputDim1 = ngroups;
            outputDim2 = 1;
        } else {
            // vector or matrix
            outputDim1 = inDim2;
            outputDim2 = ngroups;
        }
    }
    // construct lops
    if (et == ExecType.MR) {
        Lop grp_agg = null;
        // construct necessary lops: combineBinary/combineTertiary and groupedAgg
        boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
        if (isWeighted) {
            Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
            // add the combine lop to parameter list, with a new name "combinedinput"
            inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
            inputlops.remove(Statement.GAGG_TARGET);
            inputlops.remove(Statement.GAGG_GROUPS);
            inputlops.remove(Statement.GAGG_WEIGHTS);
            grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
            setRequiresReblock(true);
        } else {
            Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
            Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
            Lop append = null;
            // physical operator selection
            double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
            if (// mapgroupedagg
            groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                // pre partitioning
                boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
                if (needPart) {
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
                    ExecType.MR;
                    Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
                    setLineNumbers(dcinput);
                    inputlops.put(Statement.GAGG_GROUPS, dcinput);
                }
                Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
                grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp_agg_m);
                // post aggregation
                Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
                grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp);
                Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
                agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
                agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                grp_agg = agg1;
            // note: no reblock required
            } else // general case: groupedagg
            {
                if (// multi-column-block result matrix
                target.getDim2() >= target.getColsInBlock() || // unkown
                target.getDim2() <= 0) {
                    long m1_dim1 = target.getDim1();
                    long m1_dim2 = target.getDim2();
                    long m2_dim1 = groups.getDim1();
                    long m2_dim2 = groups.getDim2();
                    long m3_dim1 = m1_dim1;
                    long m3_dim2 = ((m1_dim2 >= 0 && m2_dim2 >= 0) ? (m1_dim2 + m2_dim2) : -1);
                    long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
                    long brlen = target.getRowsInBlock();
                    long bclen = target.getColsInBlock();
                    Lop offset = createOffsetLop(target, true);
                    Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
                    setOutputDimensions(rep);
                    setLineNumbers(rep);
                    Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
                    group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
                    setLineNumbers(group1);
                    Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
                    group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
                    setLineNumbers(group2);
                    append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
                    append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
                    setLineNumbers(append);
                } else // single-column-block vector or matrix
                {
                    append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
                }
                // add the combine lop to parameter list, with a new name "combinedinput"
                inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
                inputlops.remove(Statement.GAGG_TARGET);
                inputlops.remove(Statement.GAGG_GROUPS);
                grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    } else // CP/Spark
    {
        Lop grp_agg = null;
        if (et == ExecType.CP) {
            int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
            grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
        } else if (et == ExecType.SPARK) {
            // physical operator selection
            Hop groups = getParameterHop(Statement.GAGG_GROUPS);
            boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
            if (// mapgroupedagg
            broadcastGroups && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                Hop target = getTargetHop();
                grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
            // no reblock required (directly output binary block)
            } else // groupedagg (w/ or w/o broadcast)
            {
                grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) AppendR(org.apache.sysml.lops.AppendR) ExecType(org.apache.sysml.lops.LopProperties.ExecType) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) DataPartition(org.apache.sysml.lops.DataPartition) GroupedAggregateM(org.apache.sysml.lops.GroupedAggregateM)

Example 12 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedDivMM.

private void constructMRLopsWeightedDivMM(WDivMMType wtype) {
    // NOTE: the common case for wdivmm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted DivMM only if this constraint holds.
    Hop W = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop X = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWdivmm = ((!wtype.hasFourInputs() || wtype.hasScalar()) && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWdivmm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wdivmm always with broadcast
        Lop wdivmm = new WeightedDivMM(W.constructLops(), lU, lV, X.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    } else // general case
    {
        // MR operator selection part 2 (both cannot happen for wdivmm, otherwise mapwdivmm)
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), W.getNnz());
        setLineNumbers(grpW);
        Lop grpX = X.constructLops();
        if (wtype.hasFourInputs() && (X.getDataType() != DataType.SCALAR))
            grpX = new Group(grpX, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wdivmm w/ or without broadcast
        Lop wdivmm = new WeightedDivMMR(grpW, lU, lV, grpX, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    }
    // in contrast to to wsloss/wsigmoid, wdivmm requires partial aggregation (for the final mm)
    Group grp = new Group(getLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
    setOutputDimensions(grp);
    setLineNumbers(grp);
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
    // aggregation uses kahanSum but the inputs do not have correction values
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    setOutputDimensions(agg1);
    setLineNumbers(agg1);
    setLops(agg1);
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) WeightedDivMM(org.apache.sysml.lops.WeightedDivMM) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) WeightedDivMMR(org.apache.sysml.lops.WeightedDivMMR)

Example 13 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class QuaternaryOp method constructSparkLopsWeightedSigmoid.

private void constructSparkLopsWeightedSigmoid(WSigmoidType wtype) {
    // NOTE: the common case for wsigmoid are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Sigmoid only if this constraint holds.
    // Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
    // and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
    // required because the max_int byte buffer constraint has been fixed in Spark 1.4
    double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
    double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsig = (m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
    if (// broadcast
    !FORCE_REPLICATION && isMapWsig) {
        // map-side wsig always with broadcast
        Lop wsigmoid = new WeightedSigmoid(X.constructLops(), U.constructLops(), V.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.SPARK);
        setOutputDimensions(wsigmoid);
        setLineNumbers(wsigmoid);
        setLops(wsigmoid);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
        // reduce-side wsig w/ or without broadcast
        Lop wsigmoid = new WeightedSigmoidR(X.constructLops(), U.constructLops(), V.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.SPARK);
        setOutputDimensions(wsigmoid);
        setLineNumbers(wsigmoid);
        setLops(wsigmoid);
    }
}
Also used : WeightedSigmoidR(org.apache.sysml.lops.WeightedSigmoidR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedSigmoid(org.apache.sysml.lops.WeightedSigmoid) Lop(org.apache.sysml.lops.Lop)

Example 14 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class QuaternaryOp method constructRightFactorMRLop.

private Lop constructRightFactorMRLop(Hop U, Hop V, boolean cacheV, double m2Size) {
    Lop lV = null;
    if (cacheV) {
        // partitioning of V for read through distributed cache
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
    } else {
        // replication of t(V) for shuffle to target block
        Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
        ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
        setLineNumbers(ltV);
        // nrow of U determines num replicates
        Lop offset = createOffsetLop(U, false);
        lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
        lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
        setLineNumbers(lV);
        Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
        setLineNumbers(grpV);
        lV = grpV;
    }
    return lV;
}
Also used : Group(org.apache.sysml.lops.Group) RepMat(org.apache.sysml.lops.RepMat) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform) DataPartition(org.apache.sysml.lops.DataPartition)

Example 15 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedCeMM.

private void constructMRLopsWeightedCeMM(WCeMMType wtype) {
    // NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop eps = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWcemm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wcemm always with broadcast
        Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wcemm w/ or without broadcast
        Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedCrossEntropyR(org.apache.sysml.lops.WeightedCrossEntropyR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedCrossEntropy(org.apache.sysml.lops.WeightedCrossEntropy) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) UnaryCP(org.apache.sysml.lops.UnaryCP)

Aggregations

Lop (org.apache.sysml.lops.Lop)171 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)66 ExecType (org.apache.sysml.lops.LopProperties.ExecType)52 Group (org.apache.sysml.lops.Group)45 ArrayList (java.util.ArrayList)35 Aggregate (org.apache.sysml.lops.Aggregate)32 DataPartition (org.apache.sysml.lops.DataPartition)30 LopsException (org.apache.sysml.lops.LopsException)30 Data (org.apache.sysml.lops.Data)24 Instruction (org.apache.sysml.runtime.instructions.Instruction)23 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)18 Unary (org.apache.sysml.lops.Unary)16 Transform (org.apache.sysml.lops.Transform)15 HashMap (java.util.HashMap)14 UnaryCP (org.apache.sysml.lops.UnaryCP)14 Dag (org.apache.sysml.lops.compile.Dag)13 Hop (org.apache.sysml.hops.Hop)11 RepMat (org.apache.sysml.lops.RepMat)11 Binary (org.apache.sysml.lops.Binary)9 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)9