Search in sources :

Example 1 with AggregateDropCorrectionFunction

use of org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction in project incubator-systemml by apache.

the class AggregateTernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get inputs
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = // matrix or literal 1
    input3.isLiteral() ? // matrix or literal 1
    null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    // execute aggregate ternary operation
    AggregateTernaryOperator aggop = (AggregateTernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (in3 != null) {
        // 3 inputs
        out = in1.join(in2).join(in3).mapToPair(new RDDAggregateTernaryFunction(aggop));
    } else {
        // 2 inputs (third is literal 1)
        out = in1.join(in2).mapToPair(new RDDAggregateTernaryFunction2(aggop));
    }
    // aggregate partial results
    if (// tak+*
    aggop.indexFn instanceof ReduceAll) {
        // aggregate and create output (no lineage because scalar)
        MatrixBlock tmp = RDDAggregateUtils.sumStable(out.values());
        DoubleObject ret = new DoubleObject(tmp.getValue(0, 0));
        sec.setVariable(output.getName(), ret);
    } else if (// tack+* single block
    mcIn.dimsKnown() && mcIn.getCols() <= mcIn.getColsPerBlock()) {
        // single block aggregation and drop correction
        MatrixBlock ret = RDDAggregateUtils.aggStable(out, aggop.aggOp);
        ret.dropLastRowsOrColumns(aggop.aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
    } else // tack+* multi block
    {
        // multi-block aggregation and drop correction
        out = RDDAggregateUtils.aggByKeyStable(out, aggop.aggOp, false);
        out = out.mapValues(new AggregateDropCorrectionFunction(aggop.aggOp));
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, aggop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        if (in3 != null)
            sec.addLineageRDD(output.getName(), input3.getName());
    }
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) AggregateTernaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateTernaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 2 with AggregateDropCorrectionFunction

use of org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction in project systemml by apache.

the class AggregateTernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get inputs
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = // matrix or literal 1
    input3.isLiteral() ? // matrix or literal 1
    null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    // execute aggregate ternary operation
    AggregateTernaryOperator aggop = (AggregateTernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (in3 != null) {
        // 3 inputs
        out = in1.join(in2).join(in3).mapToPair(new RDDAggregateTernaryFunction(aggop));
    } else {
        // 2 inputs (third is literal 1)
        out = in1.join(in2).mapToPair(new RDDAggregateTernaryFunction2(aggop));
    }
    // aggregate partial results
    if (// tak+*
    aggop.indexFn instanceof ReduceAll) {
        // aggregate and create output (no lineage because scalar)
        MatrixBlock tmp = RDDAggregateUtils.sumStable(out.values());
        DoubleObject ret = new DoubleObject(tmp.getValue(0, 0));
        sec.setVariable(output.getName(), ret);
    } else if (// tack+* single block
    mcIn.dimsKnown() && mcIn.getCols() <= mcIn.getColsPerBlock()) {
        // single block aggregation and drop correction
        MatrixBlock ret = RDDAggregateUtils.aggStable(out, aggop.aggOp);
        ret.dropLastRowsOrColumns(aggop.aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
    } else // tack+* multi block
    {
        // multi-block aggregation and drop correction
        out = RDDAggregateUtils.aggByKeyStable(out, aggop.aggOp, false);
        out = out.mapValues(new AggregateDropCorrectionFunction(aggop.aggOp));
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, aggop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        if (in3 != null)
            sec.addLineageRDD(output.getName(), input3.getName());
    }
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) AggregateTernaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateTernaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 3 with AggregateDropCorrectionFunction

use of org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction in project systemml by apache.

the class AggregateUnarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in;
    // filter input blocks for trace
    if (getOpcode().equalsIgnoreCase("uaktrace"))
        out = out.filter(new FilterDiagBlocksFunction());
    // execute unary aggregate operation
    AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
    AggregateOperator aggop = _aop;
    // perform aggregation if necessary and put output into symbol table
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        JavaRDD<MatrixBlock> out2 = out.map(new RDDUAggFunction2(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
        MatrixBlock out3 = RDDAggregateUtils.aggStable(out2, aggop);
        // drop correction after aggregation
        out3.dropLastRowsOrColumns(aggop.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out3, getExtendedOpcode());
    } else // MULTI_BLOCK or NONE
    {
        if (_aggtype == SparkAggType.NONE) {
            // in case of no block aggregation, we always drop the correction as well as
            // use a partitioning-preserving mapvalues
            out = out.mapValues(new RDDUAggValueFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
        } else if (_aggtype == SparkAggType.MULTI_BLOCK) {
            // in case of multi-block aggregation, we always keep the correction
            out = out.mapToPair(new RDDUAggFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
            out = RDDAggregateUtils.aggByKeyStable(out, aggop, false);
            // partitioning, drop correction via partitioning-preserving mapvalues)
            if (auop.aggOp.correctionExists)
                out = out.mapValues(new AggregateDropCorrectionFunction(aggop));
        }
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, auop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 4 with AggregateDropCorrectionFunction

use of org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction in project incubator-systemml by apache.

the class AggregateUnarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in;
    // filter input blocks for trace
    if (getOpcode().equalsIgnoreCase("uaktrace"))
        out = out.filter(new FilterDiagBlocksFunction());
    // execute unary aggregate operation
    AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
    AggregateOperator aggop = _aop;
    // perform aggregation if necessary and put output into symbol table
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        JavaRDD<MatrixBlock> out2 = out.map(new RDDUAggFunction2(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
        MatrixBlock out3 = RDDAggregateUtils.aggStable(out2, aggop);
        // drop correction after aggregation
        out3.dropLastRowsOrColumns(aggop.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out3, getExtendedOpcode());
    } else // MULTI_BLOCK or NONE
    {
        if (_aggtype == SparkAggType.NONE) {
            // in case of no block aggregation, we always drop the correction as well as
            // use a partitioning-preserving mapvalues
            out = out.mapValues(new RDDUAggValueFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
        } else if (_aggtype == SparkAggType.MULTI_BLOCK) {
            // in case of multi-block aggregation, we always keep the correction
            out = out.mapToPair(new RDDUAggFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
            out = RDDAggregateUtils.aggByKeyStable(out, aggop, false);
            // partitioning, drop correction via partitioning-preserving mapvalues)
            if (auop.aggOp.correctionExists)
                out = out.mapValues(new AggregateDropCorrectionFunction(aggop));
        }
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, auop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 5 with AggregateDropCorrectionFunction

use of org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction in project incubator-systemml by apache.

the class UaggOuterChainSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    boolean rightCached = (_uaggOp.indexFn instanceof ReduceCol || _uaggOp.indexFn instanceof ReduceAll || !LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp));
    String rddVar = (rightCached) ? input1.getName() : input2.getName();
    String bcastVar = (rightCached) ? input2.getName() : input1.getName();
    // get rdd input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(rddVar);
    boolean noKeyChange = preservesPartitioning(mcIn, _uaggOp.indexFn);
    // execute UAggOuterChain instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp)) {
        // create sorted broadcast matrix
        MatrixBlock mb = sec.getMatrixInput(bcastVar, getExtendedOpcode());
        sec.releaseMatrixInput(bcastVar, getExtendedOpcode());
        // prevent lineage tracking
        bcastVar = null;
        double[] vmb = DataConverter.convertToDoubleVector(mb);
        Broadcast<int[]> bvi = null;
        if (_uaggOp.aggOp.increOp.fn instanceof Builtin) {
            int[] vix = LibMatrixOuterAgg.prepareRowIndices(mb.getNumColumns(), vmb, _bOp, _uaggOp);
            bvi = sec.getSparkContext().broadcast(vix);
        } else
            Arrays.sort(vmb);
        Broadcast<double[]> bv = sec.getSparkContext().broadcast(vmb);
        // partitioning-preserving map-to-pair (under constraints)
        out = in1.mapPartitionsToPair(new RDDMapUAggOuterChainFunction(bv, bvi, _bOp, _uaggOp), noKeyChange);
    } else {
        PartitionedBroadcast<MatrixBlock> bv = sec.getBroadcastForVariable(bcastVar);
        // partitioning-preserving map-to-pair (under constraints)
        out = in1.mapPartitionsToPair(new RDDMapGenUAggOuterChainFunction(bv, _uaggOp, _aggOp, _bOp, mcIn), noKeyChange);
    }
    // final aggregation if required
    if (// RC AGG (output is scalar)
    _uaggOp.indexFn instanceof ReduceAll) {
        MatrixBlock tmp = RDDAggregateUtils.aggStable(out, _aggOp);
        // drop correction after aggregation
        tmp.dropLastRowsOrColumns(_aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        sec.setMatrixOutput(output.getName(), tmp, getExtendedOpcode());
    } else // R/C AGG (output is rdd)
    {
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec);
        if (_uaggOp.aggOp.correctionExists)
            out = out.mapValues(new AggregateDropCorrectionFunction(_uaggOp.aggOp));
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), rddVar);
        if (bcastVar != null)
            sec.addLineageBroadcast(output.getName(), bcastVar);
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)6 ReduceAll (org.apache.sysml.runtime.functionobjects.ReduceAll)4 Builtin (org.apache.sysml.runtime.functionobjects.Builtin)2 ReduceCol (org.apache.sysml.runtime.functionobjects.ReduceCol)2 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)2 FilterDiagBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction)2 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)2 AggregateTernaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateTernaryOperator)2 AggregateUnaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator)2