Search in sources :

Example 1 with GroupedAggregate

use of org.apache.sysml.lops.GroupedAggregate in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.

private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) {
    // reset reblock requirement (see MR aggregate / construct lops)
    setRequiresReblock(false);
    // determine output dimensions
    long outputDim1 = -1, outputDim2 = -1;
    Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
    if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
        long ngroups = ((Data) numGroups).getLongValue();
        Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
        long inDim1 = input.getOutputParameters().getNumRows();
        long inDim2 = input.getOutputParameters().getNumCols();
        boolean rowwise = (inDim1 == 1 && inDim2 > 1);
        if (rowwise) {
            // vector
            outputDim1 = ngroups;
            outputDim2 = 1;
        } else {
            // vector or matrix
            outputDim1 = inDim2;
            outputDim2 = ngroups;
        }
    }
    // construct lops
    if (et == ExecType.MR) {
        Lop grp_agg = null;
        // construct necessary lops: combineBinary/combineTertiary and groupedAgg
        boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
        if (isWeighted) {
            Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
            // add the combine lop to parameter list, with a new name "combinedinput"
            inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
            inputlops.remove(Statement.GAGG_TARGET);
            inputlops.remove(Statement.GAGG_GROUPS);
            inputlops.remove(Statement.GAGG_WEIGHTS);
            grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
            setRequiresReblock(true);
        } else {
            Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
            Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
            Lop append = null;
            // physical operator selection
            double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
            if (// mapgroupedagg
            groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                // pre partitioning
                boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
                if (needPart) {
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
                    ExecType.MR;
                    Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
                    setLineNumbers(dcinput);
                    inputlops.put(Statement.GAGG_GROUPS, dcinput);
                }
                Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
                grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp_agg_m);
                // post aggregation
                Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
                grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp);
                Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
                agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
                agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                grp_agg = agg1;
            // note: no reblock required
            } else // general case: groupedagg
            {
                if (// multi-column-block result matrix
                target.getDim2() >= target.getColsInBlock() || // unkown
                target.getDim2() <= 0) {
                    long m1_dim1 = target.getDim1();
                    long m1_dim2 = target.getDim2();
                    long m2_dim1 = groups.getDim1();
                    long m2_dim2 = groups.getDim2();
                    long m3_dim1 = m1_dim1;
                    long m3_dim2 = ((m1_dim2 >= 0 && m2_dim2 >= 0) ? (m1_dim2 + m2_dim2) : -1);
                    long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
                    long brlen = target.getRowsInBlock();
                    long bclen = target.getColsInBlock();
                    Lop offset = createOffsetLop(target, true);
                    Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
                    setOutputDimensions(rep);
                    setLineNumbers(rep);
                    Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
                    group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
                    setLineNumbers(group1);
                    Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
                    group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
                    setLineNumbers(group2);
                    append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
                    append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
                    setLineNumbers(append);
                } else // single-column-block vector or matrix
                {
                    append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
                }
                // add the combine lop to parameter list, with a new name "combinedinput"
                inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
                inputlops.remove(Statement.GAGG_TARGET);
                inputlops.remove(Statement.GAGG_GROUPS);
                grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    } else // CP/Spark
    {
        Lop grp_agg = null;
        if (et == ExecType.CP) {
            int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
            grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
        } else if (et == ExecType.SPARK) {
            // physical operator selection
            Hop groups = getParameterHop(Statement.GAGG_GROUPS);
            boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
            if (// mapgroupedagg
            broadcastGroups && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                Hop target = getTargetHop();
                grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
            // no reblock required (directly output binary block)
            } else // groupedagg (w/ or w/o broadcast)
            {
                grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) AppendR(org.apache.sysml.lops.AppendR) ExecType(org.apache.sysml.lops.LopProperties.ExecType) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) DataPartition(org.apache.sysml.lops.DataPartition) GroupedAggregateM(org.apache.sysml.lops.GroupedAggregateM)

Example 2 with GroupedAggregate

use of org.apache.sysml.lops.GroupedAggregate in project systemml by apache.

the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.

private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) {
    // reset reblock requirement (see MR aggregate / construct lops)
    setRequiresReblock(false);
    // determine output dimensions
    long outputDim1 = -1, outputDim2 = -1;
    Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
    if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
        long ngroups = ((Data) numGroups).getLongValue();
        Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
        long inDim1 = input.getOutputParameters().getNumRows();
        long inDim2 = input.getOutputParameters().getNumCols();
        boolean rowwise = (inDim1 == 1 && inDim2 > 1);
        if (rowwise) {
            // vector
            outputDim1 = ngroups;
            outputDim2 = 1;
        } else {
            // vector or matrix
            outputDim1 = inDim2;
            outputDim2 = ngroups;
        }
    }
    // construct lops
    if (et == ExecType.MR) {
        Lop grp_agg = null;
        // construct necessary lops: combineBinary/combineTertiary and groupedAgg
        boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
        if (isWeighted) {
            Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
            // add the combine lop to parameter list, with a new name "combinedinput"
            inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
            inputlops.remove(Statement.GAGG_TARGET);
            inputlops.remove(Statement.GAGG_GROUPS);
            inputlops.remove(Statement.GAGG_WEIGHTS);
            grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
            setRequiresReblock(true);
        } else {
            Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
            Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
            Lop append = null;
            // physical operator selection
            double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
            if (// mapgroupedagg
            groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                // pre partitioning
                boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
                if (needPart) {
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
                    ExecType.MR;
                    Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
                    setLineNumbers(dcinput);
                    inputlops.put(Statement.GAGG_GROUPS, dcinput);
                }
                Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
                grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp_agg_m);
                // post aggregation
                Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
                grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp);
                Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
                agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
                agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                grp_agg = agg1;
            // note: no reblock required
            } else // general case: groupedagg
            {
                if (// multi-column-block result matrix
                target.getDim2() >= target.getColsInBlock() || // unkown
                target.getDim2() <= 0) {
                    long m1_dim1 = target.getDim1();
                    long m1_dim2 = target.getDim2();
                    long m2_dim1 = groups.getDim1();
                    long m2_dim2 = groups.getDim2();
                    long m3_dim1 = m1_dim1;
                    long m3_dim2 = ((m1_dim2 >= 0 && m2_dim2 >= 0) ? (m1_dim2 + m2_dim2) : -1);
                    long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
                    long brlen = target.getRowsInBlock();
                    long bclen = target.getColsInBlock();
                    Lop offset = createOffsetLop(target, true);
                    Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
                    setOutputDimensions(rep);
                    setLineNumbers(rep);
                    Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
                    group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
                    setLineNumbers(group1);
                    Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
                    group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
                    setLineNumbers(group2);
                    append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
                    append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
                    setLineNumbers(append);
                } else // single-column-block vector or matrix
                {
                    append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
                }
                // add the combine lop to parameter list, with a new name "combinedinput"
                inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
                inputlops.remove(Statement.GAGG_TARGET);
                inputlops.remove(Statement.GAGG_GROUPS);
                grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    } else // CP/Spark
    {
        Lop grp_agg = null;
        if (et == ExecType.CP) {
            int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
            grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
        } else if (et == ExecType.SPARK) {
            // physical operator selection
            Hop groups = getParameterHop(Statement.GAGG_GROUPS);
            boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
            if (// mapgroupedagg
            broadcastGroups && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                Hop target = getTargetHop();
                grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
            // no reblock required (directly output binary block)
            } else // groupedagg (w/ or w/o broadcast)
            {
                grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) AppendR(org.apache.sysml.lops.AppendR) ExecType(org.apache.sysml.lops.LopProperties.ExecType) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) DataPartition(org.apache.sysml.lops.DataPartition) GroupedAggregateM(org.apache.sysml.lops.GroupedAggregateM)

Aggregations

MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)2 Aggregate (org.apache.sysml.lops.Aggregate)2 AppendR (org.apache.sysml.lops.AppendR)2 Data (org.apache.sysml.lops.Data)2 DataPartition (org.apache.sysml.lops.DataPartition)2 Group (org.apache.sysml.lops.Group)2 GroupedAggregate (org.apache.sysml.lops.GroupedAggregate)2 GroupedAggregateM (org.apache.sysml.lops.GroupedAggregateM)2 Lop (org.apache.sysml.lops.Lop)2 ExecType (org.apache.sysml.lops.LopProperties.ExecType)2 RepMat (org.apache.sysml.lops.RepMat)2