Search in sources :

Example 16 with Aggregate

use of org.apache.sysml.lops.Aggregate in project systemml by apache.

the class ParameterizedBuiltinOp method constructLopsRemoveEmpty.

private void constructLopsRemoveEmpty(HashMap<String, Lop> inputlops, ExecType et) {
    Hop targetHop = getTargetHop();
    Hop marginHop = getParameterHop("margin");
    Hop selectHop = getParameterHop("select");
    Hop emptyRet = getParameterHop("empty.return");
    if (et == ExecType.CP) {
        ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
        setOutputDimensions(pbilop);
        setLineNumbers(pbilop);
        setLops(pbilop);
    /*DISABLED CP PMM (see for example, MDA Bivar test, requires size propagation on recompile)
			if( et == ExecType.CP && isTargetDiagInput() && marginHop instanceof LiteralOp 
					 && ((LiteralOp)marginHop).getStringValue().equals("rows")
					 && _outputPermutationMatrix ) //SPECIAL CASE SELECTION VECTOR
			{				
				//TODO this special case could be taken into account for memory estimates in order
				// to reduce the estimates for the input diag and subsequent matrix multiply
				
				//get input vector (without materializing diag())
				Hop input = targetHop.getInput().get(0);
				long brlen = input.getRowsInBlock();
				long bclen = input.getColsInBlock();
				MemoTable memo = new MemoTable();
			
				boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp)input).isPPredOperation());
				
				//step1: compute index vectors
				Hop ppred0 = input;
				if( !isPPredInput ) { //ppred only if required
					ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp("0",0));
					HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
					ppred0.refreshSizeInformation();
					ppred0.computeMemEstimate(memo); //select exec type
					HopRewriteUtils.copyLineNumbers(this, ppred0);
				}
				
				UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0); 
				HopRewriteUtils.setOutputBlocksizes(cumsum, brlen, bclen);
				cumsum.refreshSizeInformation(); 
				cumsum.computeMemEstimate(memo); //select exec type
				HopRewriteUtils.copyLineNumbers(this, cumsum);	
			
				BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
				HopRewriteUtils.setOutputBlocksizes(sel, brlen, bclen); 
				sel.refreshSizeInformation();
				sel.computeMemEstimate(memo); //select exec type
				HopRewriteUtils.copyLineNumbers(this, sel);
				Lop loutput = sel.constructLops();
				
				//Step 4: cleanup hops (allow for garbage collection)
				HopRewriteUtils.removeChildReference(ppred0, input);
				
				setLops( loutput );
			}
			else //GENERAL CASE
			{
				ParameterizedBuiltin pbilop = new ParameterizedBuiltin( et, inputlops,
						HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType());
				
				pbilop.getOutputParameters().setDimensions(getDim1(),getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
				setLineNumbers(pbilop);
				setLops(pbilop);
			}
			*/
    } else if (et == ExecType.MR) {
        // special compile for mr removeEmpty-diag
        if (isTargetDiagInput() && HopRewriteUtils.isLiteralOfValue(marginHop, "rows")) {
            // get input vector (without materializing diag())
            Hop input = targetHop.getInput().get(0);
            int brlen = input.getRowsInBlock();
            int bclen = input.getColsInBlock();
            MemoTable memo = new MemoTable();
            boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp) input).isPPredOperation());
            // step1: compute index vectors
            Hop ppred0 = input;
            if (!isPPredInput) {
                // ppred only if required
                ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
                HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
            }
            UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM);
            HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
            Lop loutput = null;
            double mest = AggBinaryOp.getMapmmMemEstimate(input.getDim1(), 1, brlen, bclen, -1, brlen, bclen, brlen, bclen, -1, 1, true);
            double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
            if (// SPECIAL CASE: SELECTION VECTOR
            _outputPermutationMatrix && mest < mbudget) {
                BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
                HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
                loutput = sel.constructLops();
            } else // GENERAL CASE: GENERAL PERMUTATION MATRIX
            {
                // max ensures non-zero entries and at least one output row
                BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
                HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
                DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
                seq.setName("tmp4");
                HopRewriteUtils.updateHopCharacteristics(seq, brlen, bclen, memo, this);
                // step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
                // note: weights always the input (even if isPPredInput) because input also includes 0s
                TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
                table.setOutputBlocksizes(brlen, bclen);
                table.refreshSizeInformation();
                // force MR
                table.setForcedExecType(ExecType.MR);
                HopRewriteUtils.copyLineNumbers(this, table);
                table.setDisjointInputs(true);
                table.setOutputEmptyBlocks(_outputEmptyBlocks);
                loutput = table.constructLops();
                HopRewriteUtils.removeChildReference(table, input);
            }
            // Step 4: cleanup hops (allow for garbage collection)
            HopRewriteUtils.removeChildReference(ppred0, input);
            setLops(loutput);
        } else // default mr remove empty
        if (et == ExecType.MR) {
            if (!(marginHop instanceof LiteralOp))
                throw new HopsException("Parameter 'margin' must be a literal argument.");
            Hop input = targetHop;
            long rlen = input.getDim1();
            long clen = input.getDim2();
            int brlen = input.getRowsInBlock();
            int bclen = input.getColsInBlock();
            long nnz = input.getNnz();
            boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
            // construct lops via new partial hop dag and subsequent lops construction
            // in order to reuse of operator selection decisions
            BinaryOp ppred0 = null;
            Hop emptyInd = null;
            if (selectHop == null) {
                // Step1: compute row/col non-empty indicators
                ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
                // always MR
                ppred0.setForcedExecType(ExecType.MR);
                emptyInd = ppred0;
                if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
                    emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
                    // always MR
                    emptyInd.setForcedExecType(ExecType.MR);
                    HopRewriteUtils.copyLineNumbers(this, emptyInd);
                }
            } else {
                emptyInd = selectHop;
            }
            // Step 2: compute row offsets for non-empty rows
            Hop cumsumInput = emptyInd;
            if (!rmRows) {
                cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
                HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
            }
            UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
            HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
            Hop cumsumOutput = cumsum;
            if (!rmRows) {
                cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
                HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
            }
            // alternative: right indexing
            Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
            HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
            BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
            HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
            // Step 3: gather non-empty rows/cols into final results
            Lop linput = input.constructLops();
            Lop loffset = offsets.constructLops();
            Lop lmaxdim = maxDim.constructLops();
            double mestPM = OptimizerUtils.estimatePartitionedSizeExactSparsity(rlen, 1, brlen, bclen, 1.0);
            Lop rmEmpty = null;
            // a) broadcast-based PMM (permutation matrix mult)
            if (rmRows && rlen >= 0 && mestPM < OptimizerUtils.getRemoteMemBudgetMap() && HopRewriteUtils.isLiteralOfValue(emptyRet, false)) {
                boolean needPart = !offsets.dimsKnown() || offsets.getDim1() > DistributedCacheInput.PARTITION_SIZE;
                if (needPart) {
                    // requires partitioning
                    loffset = new DataPartition(loffset, DataType.MATRIX, ValueType.DOUBLE, (mestPM > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
                    loffset.getOutputParameters().setDimensions(rlen, 1, brlen, bclen, rlen);
                    setLineNumbers(loffset);
                }
                rmEmpty = new PMMJ(loffset, linput, lmaxdim, getDataType(), getValueType(), needPart, true, ExecType.MR);
                setOutputDimensions(rmEmpty);
                setLineNumbers(rmEmpty);
            } else // b) general case: repartition-based rmempty
            {
                boolean requiresRep = ((clen > bclen || clen <= 0) && rmRows) || ((rlen > brlen || rlen <= 0) && !rmRows);
                if (requiresRep) {
                    // ncol of left input (determines num replicates)
                    Lop pos = createOffsetLop(input, rmRows);
                    loffset = new RepMat(loffset, pos, rmRows, DataType.MATRIX, ValueType.DOUBLE);
                    loffset.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
                    setLineNumbers(loffset);
                }
                Group group1 = new Group(linput, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group1);
                group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
                Group group2 = new Group(loffset, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group2);
                group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
                HashMap<String, Lop> inMap = new HashMap<>();
                inMap.put("target", group1);
                inMap.put("offset", group2);
                inMap.put("maxdim", lmaxdim);
                inMap.put("margin", inputlops.get("margin"));
                inMap.put("empty.return", inputlops.get("empty.return"));
                rmEmpty = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
                setOutputDimensions(rmEmpty);
                setLineNumbers(rmEmpty);
            }
            Group group3 = new Group(rmEmpty, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(group3);
            group3.getOutputParameters().setDimensions(-1, -1, brlen, bclen, -1);
            Aggregate finalagg = new Aggregate(group3, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
            setOutputDimensions(finalagg);
            setLineNumbers(finalagg);
            // Step 4: cleanup hops (allow for garbage collection)
            if (selectHop == null)
                HopRewriteUtils.removeChildReference(ppred0, input);
            setLops(finalagg);
        }
    } else if (et == ExecType.SPARK) {
        if (!(marginHop instanceof LiteralOp))
            throw new HopsException("Parameter 'margin' must be a literal argument.");
        Hop input = targetHop;
        long rlen = input.getDim1();
        long clen = input.getDim2();
        int brlen = input.getRowsInBlock();
        int bclen = input.getColsInBlock();
        boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
        // construct lops via new partial hop dag and subsequent lops construction
        // in order to reuse of operator selection decisions
        BinaryOp ppred0 = null;
        Hop emptyInd = null;
        if (selectHop == null) {
            // Step1: compute row/col non-empty indicators
            ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
            // always Spark
            ppred0.setForcedExecType(ExecType.SPARK);
            emptyInd = ppred0;
            if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
                emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
                // always Spark
                emptyInd.setForcedExecType(ExecType.SPARK);
            }
        } else {
            emptyInd = selectHop;
        }
        // Step 2: compute row offsets for non-empty rows
        Hop cumsumInput = emptyInd;
        if (!rmRows) {
            cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
            HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
        }
        UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
        HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
        Hop cumsumOutput = cumsum;
        if (!rmRows) {
            cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
            HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
        }
        // alternative: right indexing
        Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
        HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
        BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
        HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
        // Step 3: gather non-empty rows/cols into final results
        Lop linput = input.constructLops();
        Lop loffset = offsets.constructLops();
        Lop lmaxdim = maxDim.constructLops();
        HashMap<String, Lop> inMap = new HashMap<>();
        inMap.put("target", linput);
        inMap.put("offset", loffset);
        inMap.put("maxdim", lmaxdim);
        inMap.put("margin", inputlops.get("margin"));
        inMap.put("empty.return", inputlops.get("empty.return"));
        if (!FORCE_DIST_RM_EMPTY && isRemoveEmptyBcSP())
            _bRmEmptyBC = true;
        ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, _bRmEmptyBC);
        setOutputDimensions(pbilop);
        setLineNumbers(pbilop);
        // Step 4: cleanup hops (allow for garbage collection)
        if (selectHop == null)
            HopRewriteUtils.removeChildReference(ppred0, input);
        setLops(pbilop);
    // NOTE: in contrast to mr, replication and aggregation handled instruction-local
    }
}
Also used : Group(org.apache.sysml.lops.Group) ParameterizedBuiltin(org.apache.sysml.lops.ParameterizedBuiltin) HashMap(java.util.HashMap) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) RepMat(org.apache.sysml.lops.RepMat) GroupedAggregate(org.apache.sysml.lops.GroupedAggregate) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) PMMJ(org.apache.sysml.lops.PMMJ)

Example 17 with Aggregate

use of org.apache.sysml.lops.Aggregate in project systemml by apache.

the class QuaternaryOp method constructMRLopsWeightedSquaredLoss.

private void constructMRLopsWeightedSquaredLoss(WeightsType wtype) {
    // NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop W = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
    if (// broadcast
    !FORCE_REPLICATION && isMapWsloss) {
        // partitioning of U
        boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lU = U.constructLops();
        if (needPartU) {
            // requires partitioning
            lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
            setLineNumbers(lU);
        }
        // partitioning of V
        boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
        Lop lV = V.constructLops();
        if (needPartV) {
            // requires partitioning
            lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
            lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
            setLineNumbers(lV);
        }
        // map-side wsloss always with broadcast
        Lop wsloss = new WeightedSquaredLoss(X.constructLops(), lU, lV, W.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
        wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wsloss);
        Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
        Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grpX);
        Lop grpW = W.constructLops();
        if (grpW.getDataType() == DataType.MATRIX) {
            grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
            grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), -1);
            setLineNumbers(grpW);
        }
        Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
        Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
        // reduce-side wsloss w/ or without broadcast
        Lop wsloss = new WeightedSquaredLossR(grpX, lU, lV, grpW, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
        wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(wsloss);
        Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
        grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(grp);
        Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
        // aggregation uses kahanSum
        agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
        agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
        setLineNumbers(agg1);
        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(unary1);
        setLops(unary1);
    }
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedSquaredLoss(org.apache.sysml.lops.WeightedSquaredLoss) Lop(org.apache.sysml.lops.Lop) Aggregate(org.apache.sysml.lops.Aggregate) DataPartition(org.apache.sysml.lops.DataPartition) WeightedSquaredLossR(org.apache.sysml.lops.WeightedSquaredLossR) UnaryCP(org.apache.sysml.lops.UnaryCP)

Example 18 with Aggregate

use of org.apache.sysml.lops.Aggregate in project systemml by apache.

the class ReorgOp method constructLops.

@Override
public Lop constructLops() {
    // return already created lops
    if (getLops() != null)
        return getLops();
    ExecType et = optFindExecType();
    switch(op) {
        case TRANSPOSE:
            {
                Lop lin = getInput().get(0).constructLops();
                if (lin instanceof Transform && ((Transform) lin).getOperationType() == OperationTypes.Transpose)
                    // if input is already a transpose, avoid redundant transpose ops
                    setLops(lin.getInputs().get(0));
                else if (getDim1() == 1 && getDim2() == 1)
                    // if input of size 1x1, avoid unnecessary transpose
                    setLops(lin);
                else {
                    // general case
                    int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
                    Transform transform1 = new Transform(lin, HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k);
                    setOutputDimensions(transform1);
                    setLineNumbers(transform1);
                    setLops(transform1);
                }
                break;
            }
        case DIAG:
            {
                Transform transform1 = new Transform(getInput().get(0).constructLops(), HopsTransf2Lops.get(op), getDataType(), getValueType(), et);
                setOutputDimensions(transform1);
                setLineNumbers(transform1);
                setLops(transform1);
                break;
            }
        case REV:
            {
                Lop rev = null;
                if (et == ExecType.MR) {
                    Lop tmp = new Transform(getInput().get(0).constructLops(), HopsTransf2Lops.get(op), getDataType(), getValueType(), et);
                    setOutputDimensions(tmp);
                    setLineNumbers(tmp);
                    Group group1 = new Group(tmp, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
                    setOutputDimensions(group1);
                    setLineNumbers(group1);
                    rev = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), et);
                } else {
                    // CP/SPARK
                    rev = new Transform(getInput().get(0).constructLops(), HopsTransf2Lops.get(op), getDataType(), getValueType(), et);
                }
                setOutputDimensions(rev);
                setLineNumbers(rev);
                setLops(rev);
                break;
            }
        case RESHAPE:
            {
                // main, rows, cols, byrow
                Lop[] linputs = new Lop[4];
                for (int i = 0; i < 4; i++) linputs[i] = getInput().get(i).constructLops();
                if (et == ExecType.MR) {
                    Transform transform1 = new Transform(linputs, HopsTransf2Lops.get(op), getDataType(), getValueType(), true, et);
                    setOutputDimensions(transform1);
                    setLineNumbers(transform1);
                    Group group1 = new Group(transform1, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
                    setOutputDimensions(group1);
                    setLineNumbers(group1);
                    Aggregate agg1 = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), et);
                    setOutputDimensions(agg1);
                    setLineNumbers(agg1);
                    setLops(agg1);
                } else // CP/SPARK
                {
                    _outputEmptyBlocks = (et == ExecType.SPARK && !OptimizerUtils.allowsToFilterEmptyBlockOutputs(this));
                    Transform transform1 = new Transform(linputs, HopsTransf2Lops.get(op), getDataType(), getValueType(), _outputEmptyBlocks, et);
                    setOutputDimensions(transform1);
                    setLineNumbers(transform1);
                    setLops(transform1);
                }
                break;
            }
        case SORT:
            {
                Hop input = getInput().get(0);
                Hop by = getInput().get(1);
                Hop desc = getInput().get(2);
                Hop ixret = getInput().get(3);
                if (et == ExecType.MR) {
                    if (!(desc instanceof LiteralOp && ixret instanceof LiteralOp)) {
                        LOG.warn("Unsupported non-constant ordering parameters, using defaults and mark for recompilation.");
                        setRequiresRecompile();
                        desc = new LiteralOp(false);
                        ixret = new LiteralOp(false);
                    }
                    // Step 1: extraction (if unknown ncol or multiple columns)
                    Hop vinput = input;
                    if (input.getDim2() != 1) {
                        vinput = new IndexingOp("tmp1", getDataType(), getValueType(), input, new LiteralOp(1L), HopRewriteUtils.createValueHop(input, true), by, by, false, true);
                        vinput.refreshSizeInformation();
                        vinput.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
                        HopRewriteUtils.copyLineNumbers(this, vinput);
                    }
                    // Step 2: Index vector sort
                    Hop voutput = null;
                    if (2 * OptimizerUtils.estimateSize(vinput.getDim1(), vinput.getDim2()) > OptimizerUtils.getLocalMemBudget() || FORCE_DIST_SORT_INDEXES) {
                        // large vector, fallback to MR sort
                        // sort indexes according to given values
                        SortKeys sort = new SortKeys(vinput.constructLops(), HopRewriteUtils.getBooleanValueSafe((LiteralOp) desc), SortKeys.OperationTypes.Indexes, vinput.getDataType(), vinput.getValueType(), ExecType.MR);
                        sort.getOutputParameters().setDimensions(vinput.getDim1(), 1, vinput.getRowsInBlock(), vinput.getColsInBlock(), vinput.getNnz());
                        setLineNumbers(sort);
                        // note: this sortindexes includes also the shift by offsets and
                        // final aggregate because sideways passing of offsets would
                        // not nicely fit the current instruction model
                        setLops(sort);
                        voutput = this;
                    } else {
                        // small vector, use in-memory sort
                        ArrayList<Hop> sinputs = new ArrayList<>();
                        sinputs.add(vinput);
                        // by (always vector)
                        sinputs.add(new LiteralOp(1));
                        sinputs.add(desc);
                        // indexreturn (always indexes)
                        sinputs.add(new LiteralOp(true));
                        voutput = new ReorgOp("tmp3", getDataType(), getValueType(), ReOrgOp.SORT, sinputs);
                        HopRewriteUtils.copyLineNumbers(this, voutput);
                        // explicitly construct CP lop; otherwise there is danger of infinite recursion if forced runtime platform.
                        voutput.setLops(constructCPOrSparkSortLop(vinput, sinputs.get(1), sinputs.get(2), sinputs.get(3), ExecType.CP, false));
                        voutput.getLops().getOutputParameters().setDimensions(vinput.getDim1(), vinput.getDim2(), vinput.getRowsInBlock(), vinput.getColsInBlock(), vinput.getNnz());
                        setLops(voutput.constructLops());
                    }
                    // -- done via X' = table(seq(), IX') %*% X;
                    if (!HopRewriteUtils.getBooleanValueSafe((LiteralOp) ixret)) {
                        // generate seq
                        DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(voutput);
                        seq.setName("tmp4");
                        seq.refreshSizeInformation();
                        // select exec type
                        seq.computeMemEstimate(new MemoTable());
                        HopRewriteUtils.copyLineNumbers(this, seq);
                        // generate table
                        TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, seq, voutput, new LiteralOp(1L));
                        table.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
                        table.refreshSizeInformation();
                        // force MR
                        table.setForcedExecType(ExecType.MR);
                        HopRewriteUtils.copyLineNumbers(this, table);
                        table.setDisjointInputs(true);
                        table.setOutputEmptyBlocks(false);
                        // generate matrix mult
                        AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(table, input);
                        // force MR
                        mmult.setForcedExecType(ExecType.MR);
                        setLops(mmult.constructLops());
                        // cleanups
                        HopRewriteUtils.removeChildReference(table, input);
                    }
                } else if (et == ExecType.SPARK) {
                    boolean sortRewrite = !FORCE_DIST_SORT_INDEXES && isSortSPRewriteApplicable() && by.getDataType().isScalar();
                    Lop transform1 = constructCPOrSparkSortLop(input, by, desc, ixret, et, sortRewrite);
                    setOutputDimensions(transform1);
                    setLineNumbers(transform1);
                    setLops(transform1);
                } else // CP
                {
                    Lop transform1 = constructCPOrSparkSortLop(input, by, desc, ixret, et, false);
                    setOutputDimensions(transform1);
                    setLineNumbers(transform1);
                    setLops(transform1);
                }
                break;
            }
        default:
            throw new HopsException("Unsupported lops construction for operation type '" + op + "'.");
    }
    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();
    return getLops();
}
Also used : Group(org.apache.sysml.lops.Group) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ArrayList(java.util.ArrayList) Lop(org.apache.sysml.lops.Lop) SortKeys(org.apache.sysml.lops.SortKeys) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Transform(org.apache.sysml.lops.Transform) Aggregate(org.apache.sysml.lops.Aggregate)

Example 19 with Aggregate

use of org.apache.sysml.lops.Aggregate in project systemml by apache.

the class TernaryOp method constructLopsCtable.

/**
 * Method to construct LOPs when op = CTABLE.
 */
private void constructLopsCtable() {
    if (_op != OpOp3.CTABLE)
        throw new HopsException("Unexpected operation: " + _op + ", expecting " + OpOp3.CTABLE);
    /*
		 * We must handle three different cases: case1 : all three
		 * inputs are vectors (e.g., F=ctable(A,B,W)) case2 : two
		 * vectors and one scalar (e.g., F=ctable(A,B)) case3 : one
		 * vector and two scalars (e.g., F=ctable(A))
		 */
    // identify the particular case
    // F=ctable(A,B,W)
    DataType dt1 = getInput().get(0).getDataType();
    DataType dt2 = getInput().get(1).getDataType();
    DataType dt3 = getInput().get(2).getDataType();
    Ctable.OperationTypes ternaryOpOrig = Ctable.findCtableOperationByInputDataTypes(dt1, dt2, dt3);
    // Compute lops for all inputs
    Lop[] inputLops = new Lop[getInput().size()];
    for (int i = 0; i < getInput().size(); i++) {
        inputLops[i] = getInput().get(i).constructLops();
    }
    ExecType et = optFindExecType();
    // reset reblock requirement (see MR ctable / construct lops)
    setRequiresReblock(false);
    if (et == ExecType.CP || et == ExecType.SPARK) {
        // for CP we support only ctable expand left
        Ctable.OperationTypes ternaryOp = isSequenceRewriteApplicable(true) ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ternaryOpOrig;
        boolean ignoreZeros = false;
        if (isMatrixIgnoreZeroRewriteApplicable()) {
            // table - rmempty - rshape
            ignoreZeros = true;
            inputLops[0] = ((ParameterizedBuiltinOp) getInput().get(0)).getTargetHop().getInput().get(0).constructLops();
            inputLops[1] = ((ParameterizedBuiltinOp) getInput().get(1)).getTargetHop().getInput().get(0).constructLops();
        }
        Ctable ternary = new Ctable(inputLops, ternaryOp, getDataType(), getValueType(), ignoreZeros, et);
        ternary.getOutputParameters().setDimensions(_dim1, _dim2, getRowsInBlock(), getColsInBlock(), -1);
        setLineNumbers(ternary);
        // force blocked output in CP (see below), otherwise binarycell
        if (et == ExecType.SPARK) {
            ternary.getOutputParameters().setDimensions(_dim1, _dim2, -1, -1, -1);
            setRequiresReblock(true);
        } else
            ternary.getOutputParameters().setDimensions(_dim1, _dim2, getRowsInBlock(), getColsInBlock(), -1);
        // ternary opt, w/o reblock in CP
        setLops(ternary);
    } else // MR
    {
        // for MR we support both ctable expand left and right
        Ctable.OperationTypes ternaryOp = isSequenceRewriteApplicable() ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ternaryOpOrig;
        Group group1 = null, group2 = null, group3 = null, group4 = null;
        group1 = new Group(inputLops[0], Group.OperationTypes.Sort, getDataType(), getValueType());
        group1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
        setLineNumbers(group1);
        Ctable ternary = null;
        // create "group" lops for MATRIX inputs
        switch(ternaryOp) {
            case CTABLE_TRANSFORM:
                // F = ctable(A,B,W)
                group2 = new Group(inputLops[1], Group.OperationTypes.Sort, getDataType(), getValueType());
                group2.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
                setLineNumbers(group2);
                group3 = new Group(inputLops[2], Group.OperationTypes.Sort, getDataType(), getValueType());
                group3.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
                setLineNumbers(group3);
                if (inputLops.length == 3)
                    ternary = new Ctable(new Lop[] { group1, group2, group3 }, ternaryOp, getDataType(), getValueType(), et);
                else
                    // output dimensions are given
                    ternary = new Ctable(new Lop[] { group1, group2, group3, inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
                break;
            case CTABLE_TRANSFORM_SCALAR_WEIGHT:
                // F = ctable(A,B) or F = ctable(A,B,1)
                group2 = new Group(inputLops[1], Group.OperationTypes.Sort, getDataType(), getValueType());
                group2.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
                setLineNumbers(group2);
                if (inputLops.length == 3)
                    ternary = new Ctable(new Lop[] { group1, group2, inputLops[2] }, ternaryOp, getDataType(), getValueType(), et);
                else
                    ternary = new Ctable(new Lop[] { group1, group2, inputLops[2], inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
                break;
            case CTABLE_EXPAND_SCALAR_WEIGHT:
                // F=ctable(seq(1,N),A) or F = ctable(seq,A,1)
                // left 1, right 0 (index of input data)
                int left = isSequenceRewriteApplicable(true) ? 1 : 0;
                Group group = new Group(getInput().get(left).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
                group.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
                if (inputLops.length == 3)
                    ternary = new Ctable(new Lop[] { // matrix
                    group, // weight
                    getInput().get(2).constructLops(), // left
                    new LiteralOp(left).constructLops() }, ternaryOp, getDataType(), getValueType(), et);
                else
                    ternary = new Ctable(new Lop[] { // matrix
                    group, // weight
                    getInput().get(2).constructLops(), // left
                    new LiteralOp(left).constructLops(), inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
                break;
            case CTABLE_TRANSFORM_HISTOGRAM:
                // F=ctable(A,1) or F = ctable(A,1,1)
                if (inputLops.length == 3)
                    ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), getInput().get(2).constructLops() }, ternaryOp, getDataType(), getValueType(), et);
                else
                    ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), getInput().get(2).constructLops(), inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
                break;
            case CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
                // F=ctable(A,1,W)
                group3 = new Group(getInput().get(2).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
                group3.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
                setLineNumbers(group3);
                if (inputLops.length == 3)
                    ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), group3 }, ternaryOp, getDataType(), getValueType(), et);
                else
                    ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), group3, inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
                break;
            default:
                throw new HopsException("Invalid ternary operator type: " + _op);
        }
        // output dimensions are not known at compilation time
        ternary.getOutputParameters().setDimensions(_dim1, _dim2, (_dimInputsPresent ? getRowsInBlock() : -1), (_dimInputsPresent ? getColsInBlock() : -1), -1);
        setLineNumbers(ternary);
        Lop lctable = ternary;
        if (!(_disjointInputs || ternaryOp == Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT)) {
            // no need for aggregation if (1) input indexed disjoint	or one side is sequence	w/ 1 increment
            group4 = new Group(ternary, Group.OperationTypes.Sort, getDataType(), getValueType());
            group4.getOutputParameters().setDimensions(_dim1, _dim2, (_dimInputsPresent ? getRowsInBlock() : -1), (_dimInputsPresent ? getColsInBlock() : -1), -1);
            setLineNumbers(group4);
            Aggregate agg1 = new Aggregate(group4, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
            agg1.getOutputParameters().setDimensions(_dim1, _dim2, (_dimInputsPresent ? getRowsInBlock() : -1), (_dimInputsPresent ? getColsInBlock() : -1), -1);
            setLineNumbers(agg1);
            // kahamSum is used for aggregation but inputs do not have
            // correction values
            agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
            lctable = agg1;
        }
        setLops(lctable);
        // to introduce reblock lop since table itself outputs in blocked format if dims known.
        if (!dimsKnown() && !_dimInputsPresent) {
            setRequiresReblock(true);
        }
    }
}
Also used : Group(org.apache.sysml.lops.Group) Lop(org.apache.sysml.lops.Lop) Ctable(org.apache.sysml.lops.Ctable) DataType(org.apache.sysml.parser.Expression.DataType) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Aggregate(org.apache.sysml.lops.Aggregate)

Example 20 with Aggregate

use of org.apache.sysml.lops.Aggregate in project systemml by apache.

the class AggBinaryOp method constructMRLopsCPMMWithLeftTransposeRewrite.

private Lop constructMRLopsCPMMWithLeftTransposeRewrite() {
    // guaranteed to exists
    Hop X = getInput().get(0).getInput().get(0);
    Hop Y = getInput().get(1);
    // right vector transpose CP
    Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
    setLineNumbers(tY);
    // matrix multiply
    MMCJType type = getMMCJAggregationType(X, Y);
    MMCJ mmcj = new MMCJ(tY, X.constructLops(), getDataType(), getValueType(), type, ExecType.MR);
    setOutputDimensions(mmcj);
    setLineNumbers(mmcj);
    Group grp = new Group(mmcj, Group.OperationTypes.Sort, getDataType(), getValueType());
    setOutputDimensions(grp);
    setLineNumbers(grp);
    Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
    setOutputDimensions(agg1);
    setLineNumbers(agg1);
    // aggregation uses kahanSum but the inputs do not have correction values
    agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
    // result transpose CP
    Lop out = new Transform(agg1, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
    out.getOutputParameters().setDimensions(X.getDim2(), Y.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
    return out;
}
Also used : Group(org.apache.sysml.lops.Group) MMCJ(org.apache.sysml.lops.MMCJ) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) MMCJType(org.apache.sysml.lops.MMCJ.MMCJType) Lop(org.apache.sysml.lops.Lop) Transform(org.apache.sysml.lops.Transform) Aggregate(org.apache.sysml.lops.Aggregate)

Aggregations

Aggregate (org.apache.sysml.lops.Aggregate)42 Group (org.apache.sysml.lops.Group)38 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)32 Lop (org.apache.sysml.lops.Lop)32 DataPartition (org.apache.sysml.lops.DataPartition)20 ExecType (org.apache.sysml.lops.LopProperties.ExecType)20 PartialAggregate (org.apache.sysml.lops.PartialAggregate)10 UnaryCP (org.apache.sysml.lops.UnaryCP)10 CombineUnary (org.apache.sysml.lops.CombineUnary)6 Data (org.apache.sysml.lops.Data)6 GroupedAggregate (org.apache.sysml.lops.GroupedAggregate)6 SortKeys (org.apache.sysml.lops.SortKeys)6 Transform (org.apache.sysml.lops.Transform)6 Unary (org.apache.sysml.lops.Unary)6 ArrayList (java.util.ArrayList)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)4 OperationTypes (org.apache.sysml.lops.Aggregate.OperationTypes)4 AppendR (org.apache.sysml.lops.AppendR)4 CumulativePartialAggregate (org.apache.sysml.lops.CumulativePartialAggregate)4 CumulativeSplitAggregate (org.apache.sysml.lops.CumulativeSplitAggregate)4