Search in sources :

Example 36 with AggregateOperator

use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.

the class SpoofSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // decide upon broadcast side inputs
    boolean[] bcVect = determineBroadcastInputs(sec, _in);
    boolean[] bcVect2 = getMatrixBroadcastVector(sec, _in, bcVect);
    int main = getMainInputIndex(_in, bcVect);
    // create joined input rdd w/ replication if needed
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[main].getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock[]> in = createJoinedInputRDD(sec, _in, bcVect, (_class.getSuperclass() == SpoofOuterProduct.class));
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    // create lists of input broadcasts and scalars
    ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices = new ArrayList<>();
    ArrayList<ScalarObject> scalars = new ArrayList<>();
    for (int i = 0; i < _in.length; i++) {
        if (_in[i].getDataType() == DataType.MATRIX && bcVect[i]) {
            bcMatrices.add(sec.getBroadcastForVariable(_in[i].getName()));
        } else if (_in[i].getDataType() == DataType.SCALAR) {
            // note: even if literal, it might be compiled as scalar placeholder
            scalars.add(sec.getScalarInput(_in[i].getName(), _in[i].getValueType(), _in[i].isLiteral()));
        }
    }
    // execute generated operator
    if (// CELL
    _class.getSuperclass() == SpoofCellwise.class) {
        SpoofCellwise op = (SpoofCellwise) CodegenUtils.createInstance(_class);
        AggregateOperator aggop = getAggregateOperator(op.getAggOp());
        if (_out.getDataType() == DataType.MATRIX) {
            // execute codegen block operation
            out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            if ((op.getCellType() == CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) || (op.getCellType() == CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
                long numBlocks = (op.getCellType() == CellType.ROW_AGG) ? mcIn.getNumRowBlocks() : mcIn.getNumColBlocks();
                out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int) Math.min(out.getNumPartitions(), numBlocks), false);
            }
            sec.setRDDHandleForVariable(_out.getName(), out);
            // maintain lineage info and output characteristics
            maintainLineageInfo(sec, _in, bcVect, _out);
            updateOutputMatrixCharacteristics(sec, op);
        } else {
            // SCALAR
            out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            MatrixBlock tmpMB = RDDAggregateUtils.aggStable(out, aggop);
            sec.setVariable(_out.getName(), new DoubleObject(tmpMB.getValue(0, 0)));
        }
    } else if (// MAGG
    _class.getSuperclass() == SpoofMultiAggregate.class) {
        SpoofMultiAggregate op = (SpoofMultiAggregate) CodegenUtils.createInstance(_class);
        AggOp[] aggOps = op.getAggOps();
        MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars)).values().fold(new MatrixBlock(), new MultiAggAggregateFunction(aggOps));
        sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
    } else if (// OUTER
    _class.getSuperclass() == SpoofOuterProduct.class) {
        if (_out.getDataType() == DataType.MATRIX) {
            SpoofOperator op = (SpoofOperator) CodegenUtils.createInstance(_class);
            OutProdType type = ((SpoofOuterProduct) op).getOuterProdType();
            // update matrix characteristics
            updateOutputMatrixCharacteristics(sec, op);
            MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
            out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            if (type == OutProdType.LEFT_OUTER_PRODUCT || type == OutProdType.RIGHT_OUTER_PRODUCT) {
                long numBlocks = mcOut.getNumRowBlocks() * mcOut.getNumColBlocks();
                out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), numBlocks), false);
            }
            sec.setRDDHandleForVariable(_out.getName(), out);
            // maintain lineage info and output characteristics
            maintainLineageInfo(sec, _in, bcVect, _out);
        } else {
            out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            MatrixBlock tmp = RDDAggregateUtils.sumStable(out);
            sec.setVariable(_out.getName(), new DoubleObject(tmp.getValue(0, 0)));
        }
    } else if (_class.getSuperclass() == SpoofRowwise.class) {
        // ROW
        if (mcIn.getCols() > mcIn.getColsPerBlock()) {
            throw new DMLRuntimeException("Invalid spark rowwise operator w/ ncol=" + mcIn.getCols() + ", ncolpb=" + mcIn.getColsPerBlock() + ".");
        }
        SpoofRowwise op = (SpoofRowwise) CodegenUtils.createInstance(_class);
        long clen2 = op.getRowType().isConstDim2(op.getConstDim2()) ? op.getConstDim2() : op.getRowType().isRowTypeB1() ? sec.getMatrixCharacteristics(_in[1].getName()).getCols() : -1;
        RowwiseFunction fmmc = new RowwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars, (int) mcIn.getCols(), (int) clen2);
        out = in.mapPartitionsToPair(fmmc, op.getRowType() == RowType.ROW_AGG || op.getRowType() == RowType.NO_AGG);
        if (op.getRowType().isColumnAgg() || op.getRowType() == RowType.FULL_AGG) {
            MatrixBlock tmpMB = RDDAggregateUtils.sumStable(out);
            if (op.getRowType().isColumnAgg())
                sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
            else
                sec.setScalarOutput(_out.getName(), new DoubleObject(tmpMB.quickGetValue(0, 0)));
        } else // row-agg or no-agg
        {
            if (op.getRowType() == RowType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) {
                out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), mcIn.getNumRowBlocks()), false);
            }
            sec.setRDDHandleForVariable(_out.getName(), out);
            // maintain lineage info and output characteristics
            maintainLineageInfo(sec, _in, bcVect, _out);
            updateOutputMatrixCharacteristics(sec, op);
        }
    } else {
        throw new DMLRuntimeException("Operator " + _class.getSuperclass() + " is not supported on Spark");
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SpoofRowwise(org.apache.sysml.runtime.codegen.SpoofRowwise) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) ArrayList(java.util.ArrayList) SpoofOperator(org.apache.sysml.runtime.codegen.SpoofOperator) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) PartitionedBroadcast(org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) SpoofMultiAggregate(org.apache.sysml.runtime.codegen.SpoofMultiAggregate) OutProdType(org.apache.sysml.runtime.codegen.SpoofOuterProduct.OutProdType) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SpoofOuterProduct(org.apache.sysml.runtime.codegen.SpoofOuterProduct) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SpoofCellwise(org.apache.sysml.runtime.codegen.SpoofCellwise)

Example 37 with AggregateOperator

use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.

the class ExtractGroup method execute.

protected Iterable<Tuple2<MatrixIndexes, WeightedCell>> execute(MatrixIndexes ix, MatrixBlock group, MatrixBlock target) throws Exception {
    // sanity check matching block dimensions
    if (group.getNumRows() != target.getNumRows()) {
        throw new Exception("The blocksize for group and target blocks are mismatched: " + group.getNumRows() + " != " + target.getNumRows());
    }
    // output weighted cells
    ArrayList<Tuple2<MatrixIndexes, WeightedCell>> groupValuePairs = new ArrayList<>();
    long coloff = (ix.getColumnIndex() - 1) * _bclen;
    // local pre-aggregation for sum w/ known output dimensions
    if (_op instanceof AggregateOperator && _ngroups > 0 && OptimizerUtils.isValidCPDimensions(_ngroups, target.getNumColumns())) {
        MatrixBlock tmp = group.groupedAggOperations(target, null, new MatrixBlock(), (int) _ngroups, _op);
        for (int i = 0; i < tmp.getNumRows(); i++) {
            for (int j = 0; j < tmp.getNumColumns(); j++) {
                double tmpval = tmp.quickGetValue(i, j);
                if (tmpval != 0) {
                    WeightedCell weightedCell = new WeightedCell();
                    weightedCell.setValue(tmpval);
                    weightedCell.setWeight(1);
                    MatrixIndexes ixout = new MatrixIndexes(i + 1, coloff + j + 1);
                    groupValuePairs.add(new Tuple2<>(ixout, weightedCell));
                }
            }
        }
    } else // general case without pre-aggregation
    {
        for (int i = 0; i < group.getNumRows(); i++) {
            long groupVal = UtilFunctions.toLong(group.quickGetValue(i, 0));
            if (groupVal < 1) {
                throw new Exception("Expected group values to be greater than equal to 1 but found " + groupVal);
            }
            for (int j = 0; j < target.getNumColumns(); j++) {
                WeightedCell weightedCell = new WeightedCell();
                weightedCell.setValue(target.quickGetValue(i, j));
                weightedCell.setWeight(1);
                MatrixIndexes ixout = new MatrixIndexes(groupVal, coloff + j + 1);
                groupValuePairs.add(new Tuple2<>(ixout, weightedCell));
            }
        }
    }
    return groupValuePairs;
}
Also used : WeightedCell(org.apache.sysml.runtime.matrix.data.WeightedCell) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator)

Example 38 with AggregateOperator

use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.

the class PerformGroupByAggInCombiner method call.

@Override
public WeightedCell call(WeightedCell value1, WeightedCell value2) throws Exception {
    WeightedCell outCell = new WeightedCell();
    CM_COV_Object cmObj = new CM_COV_Object();
    if (// everything except sum
    _op instanceof CMOperator) {
        if (((CMOperator) _op).isPartialAggregateOperator()) {
            cmObj.reset();
            // cmFn.get(key.getTag());
            CM lcmFn = CM.getCMFnObject(((CMOperator) _op).aggOpType);
            // partial aggregate cm operator
            lcmFn.execute(cmObj, value1.getValue(), value1.getWeight());
            lcmFn.execute(cmObj, value2.getValue(), value2.getWeight());
            outCell.setValue(cmObj.getRequiredPartialResult(_op));
            outCell.setWeight(cmObj.getWeight());
        } else // forward tuples to reducer
        {
            throw new DMLRuntimeException("Incorrect usage, should have used PerformGroupByAggInReducer");
        }
    } else if (// sum
    _op instanceof AggregateOperator) {
        AggregateOperator aggop = (AggregateOperator) _op;
        if (aggop.correctionExists) {
            KahanObject buffer = new KahanObject(aggop.initialValue, 0);
            KahanPlus.getKahanPlusFnObject();
            // partial aggregate with correction
            aggop.increOp.fn.execute(buffer, value1.getValue() * value1.getWeight());
            aggop.increOp.fn.execute(buffer, value2.getValue() * value2.getWeight());
            outCell.setValue(buffer._sum);
            outCell.setWeight(1);
        } else // no correction
        {
            double v = aggop.initialValue;
            // partial aggregate without correction
            v = aggop.increOp.fn.execute(v, value1.getValue() * value1.getWeight());
            v = aggop.increOp.fn.execute(v, value2.getValue() * value2.getWeight());
            outCell.setValue(v);
            outCell.setWeight(1);
        }
    } else
        throw new DMLRuntimeException("Unsupported operator in grouped aggregate instruction:" + _op);
    return outCell;
}
Also used : WeightedCell(org.apache.sysml.runtime.matrix.data.WeightedCell) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) CM(org.apache.sysml.runtime.functionobjects.CM) CMOperator(org.apache.sysml.runtime.matrix.operators.CMOperator) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 39 with AggregateOperator

use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.

the class PerformGroupByAggInReducer method call.

@Override
public WeightedCell call(Iterable<WeightedCell> kv) throws Exception {
    WeightedCell outCell = new WeightedCell();
    CM_COV_Object cmObj = new CM_COV_Object();
    if (// everything except sum
    op instanceof CMOperator) {
        cmObj.reset();
        // cmFn.get(key.getTag());
        CM lcmFn = CM.getCMFnObject(((CMOperator) op).aggOpType);
        if (((CMOperator) op).isPartialAggregateOperator()) {
            throw new DMLRuntimeException("Incorrect usage, should have used PerformGroupByAggInCombiner");
        } else // forward tuples to reducer
        {
            for (WeightedCell value : kv) lcmFn.execute(cmObj, value.getValue(), value.getWeight());
            outCell.setValue(cmObj.getRequiredResult(op));
            outCell.setWeight(1);
        }
    } else if (// sum
    op instanceof AggregateOperator) {
        AggregateOperator aggop = (AggregateOperator) op;
        if (aggop.correctionExists) {
            KahanObject buffer = new KahanObject(aggop.initialValue, 0);
            KahanPlus.getKahanPlusFnObject();
            // partial aggregate with correction
            for (WeightedCell value : kv) aggop.increOp.fn.execute(buffer, value.getValue() * value.getWeight());
            outCell.setValue(buffer._sum);
            outCell.setWeight(1);
        } else // no correction
        {
            double v = aggop.initialValue;
            // partial aggregate without correction
            for (WeightedCell value : kv) v = aggop.increOp.fn.execute(v, value.getValue() * value.getWeight());
            outCell.setValue(v);
            outCell.setWeight(1);
        }
    } else
        throw new DMLRuntimeException("Unsupported operator in grouped aggregate instruction:" + op);
    return outCell;
}
Also used : WeightedCell(org.apache.sysml.runtime.matrix.data.WeightedCell) CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) CM(org.apache.sysml.runtime.functionobjects.CM) CMOperator(org.apache.sysml.runtime.matrix.operators.CMOperator) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 40 with AggregateOperator

use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.

the class MatrixBlock method max.

/**
 * Wrapper method for reduceall-max of a matrix.
 *
 * @return ?
 */
public double max() {
    // construct operator
    AggregateOperator aop = new AggregateOperator(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject("max"));
    AggregateUnaryOperator auop = new AggregateUnaryOperator(aop, ReduceAll.getReduceAllFnObject());
    // execute operation
    MatrixBlock out = new MatrixBlock(1, 1, false);
    LibMatrixAgg.aggregateUnaryMatrix(this, out, auop);
    return out.quickGetValue(0, 0);
}
Also used : AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator)

Aggregations

AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)42 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)17 AggregateBinaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator)16 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)12 AggregateUnaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator)11 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)10 CorrectionLocationType (org.apache.sysml.lops.PartialAggregate.CorrectionLocationType)9 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)8 CM (org.apache.sysml.runtime.functionobjects.CM)8 CMOperator (org.apache.sysml.runtime.matrix.operators.CMOperator)7 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)5 WeightedCell (org.apache.sysml.runtime.matrix.data.WeightedCell)5 BinaryOperator (org.apache.sysml.runtime.matrix.operators.BinaryOperator)4 Operator (org.apache.sysml.runtime.matrix.operators.Operator)4 ArrayList (java.util.ArrayList)3 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)3 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)3 CM_COV_Object (org.apache.sysml.runtime.instructions.cp.CM_COV_Object)3 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)3 IOException (java.io.IOException)2