Search in sources :

Example 1 with FilterDiagBlocksFunction

use of org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction in project incubator-systemml by apache.

the class ReorgSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    //get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    if (//TRANSPOSE
    opcode.equalsIgnoreCase("r'")) {
        //execute transpose reorg operation
        out = in1.mapToPair(new ReorgMapFunction(opcode));
    } else if (//REVERSE
    opcode.equalsIgnoreCase("rev")) {
        //execute reverse reorg operation
        out = in1.flatMapToPair(new RDDRevFunction(mcIn));
        if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
            out = RDDAggregateUtils.mergeByKey(out, false);
    } else if (// DIAG
    opcode.equalsIgnoreCase("rdiag")) {
        if (mcIn.getCols() == 1) {
            // diagV2M
            out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
        } else {
            // diagM2V
            //execute diagM2V operation
            out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
        }
    } else if (//ORDER
    opcode.equalsIgnoreCase("rsort")) {
        // Sort by column 'col' in ascending/descending order and return either index/value
        //get parameters
        long col = ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue();
        boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
        boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
        boolean singleCol = (mcIn.getCols() == 1);
        // extract column (if necessary) and sort 
        out = in1;
        if (!singleCol) {
            out = out.filter(new IsBlockInRange(1, mcIn.getRows(), col, col, mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(col, mcIn.getColsPerBlock())));
        }
        //actual index/data sort operation
        if (ixret) {
            //sort indexes 
            out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
        } else if (singleCol && !desc) {
            //sort single-column matrix
            out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
        } else {
            //sort multi-column matrix
            if (!_bSortIndInMem)
                out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
            else
                out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
        }
    } else {
        throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
    }
    //store output rdd handle
    updateReorgMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ReorgMapFunction(org.apache.sysml.runtime.instructions.spark.functions.ReorgMapFunction)

Example 2 with FilterDiagBlocksFunction

use of org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction in project incubator-systemml by apache.

the class AggregateUnarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    //get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in;
    //filter input blocks for trace
    if (getOpcode().equalsIgnoreCase("uaktrace"))
        out = out.filter(new FilterDiagBlocksFunction());
    //execute unary aggregate operation
    AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
    AggregateOperator aggop = _aop;
    //perform aggregation if necessary and put output into symbol table
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        JavaRDD<MatrixBlock> out2 = out.map(new RDDUAggFunction2(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
        MatrixBlock out3 = RDDAggregateUtils.aggStable(out2, aggop);
        //drop correction after aggregation
        out3.dropLastRowsOrColums(aggop.correctionLocation);
        //put output block into symbol table (no lineage because single block)
        //this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out3);
    } else //MULTI_BLOCK or NONE
    {
        if (_aggtype == SparkAggType.NONE) {
            //in case of no block aggregation, we always drop the correction as well as
            //use a partitioning-preserving mapvalues 
            out = out.mapValues(new RDDUAggValueFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
        } else if (_aggtype == SparkAggType.MULTI_BLOCK) {
            //in case of multi-block aggregation, we always keep the correction
            out = out.mapToPair(new RDDUAggFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
            out = RDDAggregateUtils.aggByKeyStable(out, aggop, false);
            //partitioning, drop correction via partitioning-preserving mapvalues)
            if (auop.aggOp.correctionExists)
                out = out.mapValues(new AggregateDropCorrectionFunction(aggop));
        }
        //put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, auop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)2 FilterDiagBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction)2 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)2 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)2 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)2 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)1 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)1 IsBlockInRange (org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange)1 ReorgMapFunction (org.apache.sysml.runtime.instructions.spark.functions.ReorgMapFunction)1 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)1 AggregateUnaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator)1