Search in sources :

Example 1 with Aggregate

use of org.apache.sysml.lops.Aggregate in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method constructLopsRemoveEmpty.

private void constructLopsRemoveEmpty(HashMap<String, Lop> inputlops, ExecType et) {
    Hop targetHop = getTargetHop();
    Hop marginHop = getParameterHop("margin");
    Hop selectHop = getParameterHop("select");
    Hop emptyRet = getParameterHop("empty.return");
    if (et == ExecType.CP || et == ExecType.CP_FILE) {
        ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
        setOutputDimensions(pbilop);
        setLineNumbers(pbilop);
        setLops(pbilop);
    /*DISABLED CP PMM (see for example, MDA Bivar test, requires size propagation on recompile)
			if( et == ExecType.CP && isTargetDiagInput() && marginHop instanceof LiteralOp 
					 && ((LiteralOp)marginHop).getStringValue().equals("rows")
					 && _outputPermutationMatrix ) //SPECIAL CASE SELECTION VECTOR
			{				
				//TODO this special case could be taken into account for memory estimates in order
				// to reduce the estimates for the input diag and subsequent matrix multiply
				
				//get input vector (without materializing diag())
				Hop input = targetHop.getInput().get(0);
				long brlen = input.getRowsInBlock();
				long bclen = input.getColsInBlock();
				MemoTable memo = new MemoTable();
			
				boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp)input).isPPredOperation());
				
				//step1: compute index vectors
				Hop ppred0 = input;
				if( !isPPredInput ) { //ppred only if required
					ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp("0",0));
					HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
					ppred0.refreshSizeInformation();
					ppred0.computeMemEstimate(memo); //select exec type
					HopRewriteUtils.copyLineNumbers(this, ppred0);
				}
				
				UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0); 
				HopRewriteUtils.setOutputBlocksizes(cumsum, brlen, bclen);
				cumsum.refreshSizeInformation(); 
				cumsum.computeMemEstimate(memo); //select exec type
				HopRewriteUtils.copyLineNumbers(this, cumsum);	
			
				BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
				HopRewriteUtils.setOutputBlocksizes(sel, brlen, bclen); 
				sel.refreshSizeInformation();
				sel.computeMemEstimate(memo); //select exec type
				HopRewriteUtils.copyLineNumbers(this, sel);
				Lop loutput = sel.constructLops();
				
				//Step 4: cleanup hops (allow for garbage collection)
				HopRewriteUtils.removeChildReference(ppred0, input);
				
				setLops( loutput );
			}
			else //GENERAL CASE
			{
				ParameterizedBuiltin pbilop = new ParameterizedBuiltin( et, inputlops,
						HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType());
				
				pbilop.getOutputParameters().setDimensions(getDim1(),getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
				setLineNumbers(pbilop);
				setLops(pbilop);
			}
			*/
    } else if (et == ExecType.MR) {
        // special compile for mr removeEmpty-diag
        if (isTargetDiagInput() && HopRewriteUtils.isLiteralOfValue(marginHop, "rows")) {
            // get input vector (without materializing diag())
            Hop input = targetHop.getInput().get(0);
            int brlen = input.getRowsInBlock();
            int bclen = input.getColsInBlock();
            MemoTable memo = new MemoTable();
            boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp) input).isPPredOperation());
            // step1: compute index vectors
            Hop ppred0 = input;
            if (!isPPredInput) {
                // ppred only if required
                ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
                HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
            }
            UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM);
            HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
            Lop loutput = null;
            double mest = AggBinaryOp.getMapmmMemEstimate(input.getDim1(), 1, brlen, bclen, -1, brlen, bclen, brlen, bclen, -1, 1, true);
            double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
            if (// SPECIAL CASE: SELECTION VECTOR
            _outputPermutationMatrix && mest < mbudget) {
                BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
                HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
                loutput = sel.constructLops();
            } else // GENERAL CASE: GENERAL PERMUTATION MATRIX
            {
                // max ensures non-zero entries and at least one output row
                BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
                HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
                DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
                seq.setName("tmp4");
                HopRewriteUtils.updateHopCharacteristics(seq, brlen, bclen, memo, this);
                // step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
                // note: weights always the input (even if isPPredInput) because input also includes 0s
                TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
                table.setOutputBlocksizes(brlen, bclen);
                table.refreshSizeInformation();
                // force MR
                table.setForcedExecType(ExecType.MR);
                HopRewriteUtils.copyLineNumbers(this, table);
                table.setDisjointInputs(true);
                table.setOutputEmptyBlocks(_outputEmptyBlocks);
                loutput = table.constructLops();
                HopRewriteUtils.removeChildReference(table, input);
            }
            // Step 4: cleanup hops (allow for garbage collection)
            HopRewriteUtils.removeChildReference(ppred0, input);
            setLops(loutput);
        } else // default mr remove empty
        if (et == ExecType.MR) {
            if (!(marginHop instanceof LiteralOp))
                throw new HopsException("Parameter 'margin' must be a literal argument.");
            Hop input = targetHop;
            long rlen = input.getDim1();
            long clen = input.getDim2();
            int brlen = input.getRowsInBlock();
            int bclen = input.getColsInBlock();
            long nnz = input.getNnz();
            boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
            // construct lops via new partial hop dag and subsequent lops construction
            // in order to reuse of operator selection decisions
            BinaryOp ppred0 = null;
            Hop emptyInd = null;
            if (selectHop == null) {
                // Step1: compute row/col non-empty indicators
                ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
                // always MR
                ppred0.setForcedExecType(ExecType.MR);
                emptyInd = ppred0;
                if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
                    emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
                    // always MR
                    emptyInd.setForcedExecType(ExecType.MR);
                    HopRewriteUtils.copyLineNumbers(this, emptyInd);
                }
            } else {
                emptyInd = selectHop;
            }
            // Step 2: compute row offsets for non-empty rows
            Hop cumsumInput = emptyInd;
            if (!rmRows) {
                cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
                HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
            }
            UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
            HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
            Hop cumsumOutput = cumsum;
            if (!rmRows) {
                cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
                HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
            }
            // alternative: right indexing
            Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
            HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
            BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
            HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
            // Step 3: gather non-empty rows/cols into final results
            Lop linput = input.constructLops();
            Lop loffset = offsets.constructLops();
            Lop lmaxdim = maxDim.constructLops();
            double mestPM = OptimizerUtils.estimatePartitionedSizeExactSparsity(rlen, 1, brlen, bclen, 1.0);
            Lop rmEmpty = null;
            // a) broadcast-based PMM (permutation matrix mult)
            if (rmRows && rlen >= 0 && mestPM < OptimizerUtils.getRemoteMemBudgetMap() && HopRewriteUtils.isLiteralOfValue(emptyRet, false)) {
                boolean needPart = !offsets.dimsKnown() || offsets.getDim1() > DistributedCacheInput.PARTITION_SIZE;
                if (needPart) {
                    // requires partitioning
                    loffset = new DataPartition(loffset, DataType.MATRIX, ValueType.DOUBLE, (mestPM > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    loffset.getOutputParameters().setDimensions(rlen, 1, brlen, bclen, rlen);
                    setLineNumbers(loffset);
                }
                rmEmpty = new PMMJ(loffset, linput, lmaxdim, getDataType(), getValueType(), needPart, true, ExecType.MR);
                setOutputDimensions(rmEmpty);
                setLineNumbers(rmEmpty);
            } else // b) general case: repartition-based rmempty
            {
                boolean requiresRep = ((clen > bclen || clen <= 0) && rmRows) || ((rlen > brlen || rlen <= 0) && !rmRows);
                if (requiresRep) {
                    // ncol of left input (determines num replicates)
                    Lop pos = createOffsetLop(input, rmRows);
                    loffset = new RepMat(loffset, pos, rmRows, DataType.MATRIX, ValueType.DOUBLE);
                    loffset.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
                    setLineNumbers(loffset);
                }
                Group group1 = new Group(linput, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group1);
                group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
                Group group2 = new Group(loffset, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group2);
                group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
                HashMap<String, Lop> inMap = new HashMap<>();
                inMap.put("target", group1);
                inMap.put("offset", group2);
                inMap.put("maxdim", lmaxdim);
                inMap.put("margin", inputlops.get("margin"));
                inMap.put("empty.return", inputlops.get("empty.return"));
                rmEmpty = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
                setOutputDimensions(rmEmpty);
                setLineNumbers(rmEmpty);
            }
            Group group3 = new Group(rmEmpty, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(group3);
            group3.getOutputParameters().setDimensions(-1, -1, brlen, bclen, -1);
            Aggregate finalagg = new Aggregate(group3, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
            setOutputDimensions(finalagg);
            setLineNumbers(finalagg);
            // Step 4: cleanup hops (allow for garbage collection)
            if (selectHop == null)
                HopRewriteUtils.removeChildReference(ppred0, input);
            setLops(finalagg);
        }
    } else if (et == ExecType.SPARK) {
        if (!(marginHop instanceof LiteralOp))
            throw new HopsException("Parameter 'margin' must be a literal argument.");
        Hop input = targetHop;
        long rlen = input.getDim1();
        long clen = input.getDim2();
        int brlen = input.getRowsInBlock();
        int bclen = input.getColsInBlock();
        boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
        // construct lops via new partial hop dag and subsequent lops construction
        // in order to reuse of operator selection decisions
        BinaryOp ppred0 = null;
        Hop emptyInd = null;
        if (selectHop == null) {
            // Step1: compute row/col non-empty indicators
            ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
            // always Spark
            ppred0.setForcedExecType(ExecType.SPARK);
            emptyInd = ppred0;
            if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
                emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
                // always Spark
                emptyInd.setForcedExecType(ExecType.SPARK);
            }
        } else {
            emptyInd = selectHop;
        }
        // Step 2: compute row offsets for non-empty rows
        Hop cumsumInput = emptyInd;
        if (!rmRows) {
            cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
            HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
        }
        UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
        HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
        Hop cumsumOutput = cumsum;
        if (!rmRows) {
            cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
            HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
        }
        // alternative: right indexing
        Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
        HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
        BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
        HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
        // Step 3: gather non-empty rows/cols into final results
        Lop linput = input.constructLops();
        Lop loffset = offsets.constructLops();
        Lop lmaxdim = maxDim.constructLops();
        HashMap<String, Lop> inMap = new HashMap<>();
        inMap.put("target", linput);
        inMap.put("offset", loffset);
        inMap.put("maxdim", lmaxdim);
        inMap.put("margin", inputlops.get("margin"));
        inMap.put("empty.return", inputlops.get("empty.return"));
        if (!FORCE_DIST_RM_EMPTY && isRemoveEmptyBcSP())
            _bRmEmptyBC = true;
        ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, _bRmEmptyBC);
        setOutputDimensions(pbilop);
        setLineNumbers(pbilop);
        // Step 4: cleanup hops (allow for garbage collection)
        if (selectHop == null)
            HopRewriteUtils.removeChildReference(ppred0, input);
        setLops(pbilop);
    // NOTE: in contrast to mr, replication and aggregation handled instruction-local
    }
}
Also used : Group(org.apache.sysml.lops.Group) ParameterizedBuiltin(org.apache.sysml.lops.ParameterizedBuiltin) HashMap(java.util.HashMap) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) PMMJ(org.apache.sysml.lops.PMMJ)

Example 2 with Aggregate

use of org.apache.sysml.lops.Aggregate in project incubator-systemml by apache.

the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.

private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) {
    // reset reblock requirement (see MR aggregate / construct lops)
    setRequiresReblock(false);
    // determine output dimensions
    long outputDim1 = -1, outputDim2 = -1;
    Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
    if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
        long ngroups = ((Data) numGroups).getLongValue();
        Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
        long inDim1 = input.getOutputParameters().getNumRows();
        long inDim2 = input.getOutputParameters().getNumCols();
        boolean rowwise = (inDim1 == 1 && inDim2 > 1);
        if (rowwise) {
            // vector
            outputDim1 = ngroups;
            outputDim2 = 1;
        } else {
            // vector or matrix
            outputDim1 = inDim2;
            outputDim2 = ngroups;
        }
    }
    // construct lops
    if (et == ExecType.MR) {
        Lop grp_agg = null;
        // construct necessary lops: combineBinary/combineTertiary and groupedAgg
        boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
        if (isWeighted) {
            Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
            // add the combine lop to parameter list, with a new name "combinedinput"
            inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
            inputlops.remove(Statement.GAGG_TARGET);
            inputlops.remove(Statement.GAGG_GROUPS);
            inputlops.remove(Statement.GAGG_WEIGHTS);
            grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
            setRequiresReblock(true);
        } else {
            Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
            Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
            Lop append = null;
            // physical operator selection
            double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
            if (// mapgroupedagg
            groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                // pre partitioning
                boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
                if (needPart) {
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
                    ExecType.MR;
                    Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
                    setLineNumbers(dcinput);
                    inputlops.put(Statement.GAGG_GROUPS, dcinput);
                }
                Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
                grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp_agg_m);
                // post aggregation
                Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
                grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                setLineNumbers(grp);
                Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
                agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
                agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
                grp_agg = agg1;
            // note: no reblock required
            } else // general case: groupedagg
            {
                if (// multi-column-block result matrix
                target.getDim2() >= target.getColsInBlock() || // unkown
                target.getDim2() <= 0) {
                    long m1_dim1 = target.getDim1();
                    long m1_dim2 = target.getDim2();
                    long m2_dim1 = groups.getDim1();
                    long m2_dim2 = groups.getDim2();
                    long m3_dim1 = m1_dim1;
                    long m3_dim2 = ((m1_dim2 >= 0 && m2_dim2 >= 0) ? (m1_dim2 + m2_dim2) : -1);
                    long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
                    long brlen = target.getRowsInBlock();
                    long bclen = target.getColsInBlock();
                    Lop offset = createOffsetLop(target, true);
                    Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
                    setOutputDimensions(rep);
                    setLineNumbers(rep);
                    Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
                    group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
                    setLineNumbers(group1);
                    Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
                    group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
                    setLineNumbers(group2);
                    append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
                    append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
                    setLineNumbers(append);
                } else // single-column-block vector or matrix
                {
                    append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
                }
                // add the combine lop to parameter list, with a new name "combinedinput"
                inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
                inputlops.remove(Statement.GAGG_TARGET);
                inputlops.remove(Statement.GAGG_GROUPS);
                grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    } else // CP/Spark
    {
        Lop grp_agg = null;
        if (et == ExecType.CP) {
            int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
            grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
            grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
        } else if (et == ExecType.SPARK) {
            // physical operator selection
            Hop groups = getParameterHop(Statement.GAGG_GROUPS);
            boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
            if (// mapgroupedagg
            broadcastGroups && getParameterHop(Statement.GAGG_FN) instanceof LiteralOp && ((LiteralOp) getParameterHop(Statement.GAGG_FN)).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
                Hop target = getTargetHop();
                grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
            // no reblock required (directly output binary block)
            } else // groupedagg (w/ or w/o broadcast)
            {
                grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
                grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
                setRequiresReblock(true);
            }
        }
        setLineNumbers(grp_agg);
        setLops(grp_agg);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) AppendR(org.apache.sysml.lops.AppendR) ExecType(org.apache.sysml.lops.LopProperties.ExecType) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) DataPartition(org.apache.sysml.lops.DataPartition) GroupedAggregateM(org.apache.sysml.lops.GroupedAggregateM)

Example 3 with Aggregate

use of org.apache.sysml.lops.Aggregate in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedDivMM.

private void constructMRLopsWeightedDivMM(WDivMMType wtype) {
    // NOTE: the common case for wdivmm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted DivMM only if this constraint holds.
    Hop W = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop X = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWdivmm = ((!wtype.hasFourInputs() || wtype.hasScalar()) && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWdivmm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wdivmm always with broadcast
        Lop wdivmm = new WeightedDivMM(W.constructLops(), lU, lV, X.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    } else // general case
    {
        // MR operator selection part 2 (both cannot happen for wdivmm, otherwise mapwdivmm)
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), W.getNnz());
        setLineNumbers(grpW);
        Lop grpX = X.constructLops();
        if (wtype.hasFourInputs() && (X.getDataType() != DataType.SCALAR))
            grpX = new Group(grpX, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wdivmm w/ or without broadcast
        Lop wdivmm = new WeightedDivMMR(grpW, lU, lV, grpX, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        setOutputDimensions(wdivmm);
        setLineNumbers(wdivmm);
        setLops(wdivmm);
    }
    // in contrast to to wsloss/wsigmoid, wdivmm requires partial aggregation (for the final mm)
    Group grp = new Group(getLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
    setOutputDimensions(grp);
    setLineNumbers(grp);
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
    // aggregation uses kahanSum but the inputs do not have correction values
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    setOutputDimensions(agg1);
    setLineNumbers(agg1);
    setLops(agg1);
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) WeightedDivMM(org.apache.sysml.lops.WeightedDivMM) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) WeightedDivMMR(org.apache.sysml.lops.WeightedDivMMR)

Example 4 with Aggregate

use of org.apache.sysml.lops.Aggregate in project incubator-systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedCeMM.

private void constructMRLopsWeightedCeMM(WCeMMType wtype) {
    // NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop eps = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWcemm) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wcemm always with broadcast
        Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wcemm w/ or without broadcast
        Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wcemm);
        Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) WeightedCrossEntropyR(org.apache.sysml.lops.WeightedCrossEntropyR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedCrossEntropy(org.apache.sysml.lops.WeightedCrossEntropy) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 5 with Aggregate

use of org.apache.sysml.lops.Aggregate in project incubator-systemml by apache.

the class UnaryOp method constructLopsMRCumulativeUnary.

/**
 * MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent offsetting.
 * Note that we currently support one robust physical operator but many alternative
 * realizations are possible for specific scenarios (e.g., when the preaggregated intermediate
 * fit into the map task memory budget) or by creating custom job types.
 *
 * @return low-level operator
 */
private Lop constructLopsMRCumulativeUnary() {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.MR;
    OperationTypes aggtype = getCumulativeAggType();
    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<>();
    int level = 0;
    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) {
        DATA.add(TEMP);
        // preaggregation per block
        long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
        Lop preagg = new CumulativePartialAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
        preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(preagg);
        Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(group);
        Aggregate agg = new Aggregate(group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
        agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        // aggregation uses kahanSum but the inputs do not have correction values
        agg.setupCorrectionLocation(CorrectionLocationType.NONE);
        setLineNumbers(agg);
        TEMP = agg;
        level++;
        // in case of unknowns, generate one level
        force = false;
    }
    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
        int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
        Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
        unary1.getOutputParameters().setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
        setLineNumbers(unary1);
        TEMP = unary1;
    }
    // split, group and mr cumsum
    while (level-- > 0) {
        double init = getCumulativeInitValue();
        CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init);
        split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(split);
        Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(group1);
        Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(group2);
        CumulativeOffsetBinary binary = new CumulativeOffsetBinary(group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
        binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(binary);
        TEMP = binary;
    }
    return TEMP;
}
Also used : Group(org.apache.sysml.lops.Group) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ArrayList(java.util.ArrayList) Lop(org.apache.sysml.lops.Lop) CombineUnary(org.apache.sysml.lops.CombineUnary) Unary(org.apache.sysml.lops.Unary) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate) OperationTypes(org.apache.sysml.lops.Aggregate.OperationTypes) CumulativeOffsetBinary(org.apache.sysml.lops.CumulativeOffsetBinary) PartialAggregate(org.apache.sysml.lops.PartialAggregate) CumulativeSplitAggregate(org.apache.sysml.lops.CumulativeSplitAggregate) Aggregate(org.apache.sysml.lops.Aggregate) CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate)

Aggregations

Aggregate (org.apache.sysml.lops.Aggregate)42 Group (org.apache.sysml.lops.Group)38 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)32 Lop (org.apache.sysml.lops.Lop)32 DataPartition (org.apache.sysml.lops.DataPartition)20 ExecType (org.apache.sysml.lops.LopProperties.ExecType)20 PartialAggregate (org.apache.sysml.lops.PartialAggregate)10 UnaryCP (org.apache.sysml.lops.UnaryCP)10 CombineUnary (org.apache.sysml.lops.CombineUnary)6 Data (org.apache.sysml.lops.Data)6 GroupedAggregate (org.apache.sysml.lops.GroupedAggregate)6 SortKeys (org.apache.sysml.lops.SortKeys)6 Transform (org.apache.sysml.lops.Transform)6 Unary (org.apache.sysml.lops.Unary)6 ArrayList (java.util.ArrayList)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)4 OperationTypes (org.apache.sysml.lops.Aggregate.OperationTypes)4 AppendR (org.apache.sysml.lops.AppendR)4 CumulativePartialAggregate (org.apache.sysml.lops.CumulativePartialAggregate)4 CumulativeSplitAggregate (org.apache.sysml.lops.CumulativeSplitAggregate)4