Search in sources :

Example 16 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class ReorgSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    // get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    if (// TRANSPOSE
    opcode.equalsIgnoreCase("r'")) {
        // execute transpose reorg operation
        out = in1.mapToPair(new ReorgMapFunction(opcode));
    } else if (// REVERSE
    opcode.equalsIgnoreCase("rev")) {
        // execute reverse reorg operation
        out = in1.flatMapToPair(new RDDRevFunction(mcIn));
        if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
            out = RDDAggregateUtils.mergeByKey(out, false);
    } else if (// DIAG
    opcode.equalsIgnoreCase("rdiag")) {
        if (mcIn.getCols() == 1) {
            // diagV2M
            out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
        } else {
            // diagM2V
            // execute diagM2V operation
            out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
        }
    } else if (// ORDER
    opcode.equalsIgnoreCase("rsort")) {
        // Sort by column 'col' in ascending/descending order and return either index/value
        // get parameters
        long[] cols = _col.getDataType().isMatrix() ? DataConverter.convertToLongVector(ec.getMatrixInput(_col.getName())) : new long[] { ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue() };
        boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
        boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
        boolean singleCol = (mcIn.getCols() == 1);
        out = in1;
        if (cols.length > mcIn.getColsPerBlock())
            LOG.warn("Unsupported sort with number of order-by columns large than blocksize: " + cols.length);
        if (singleCol || cols.length == 1) {
            // extract column (if necessary) and sort
            if (!singleCol)
                out = out.filter(new IsBlockInRange(1, mcIn.getRows(), cols[0], cols[0], mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(cols[0], mcIn.getColsPerBlock())));
            // actual index/data sort operation
            if (// sort indexes
            ixret)
                out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
            else if (// sort single-column matrix
            singleCol && !desc)
                out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
            else if (// sort multi-column matrix w/ rewrite
            !_bSortIndInMem)
                out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
            else
                // sort multi-column matrix
                out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
        } else {
            // extract columns (if necessary)
            if (cols.length < mcIn.getCols())
                out = out.filter(new IsBlockInList(cols, mcIn)).mapToPair(new ExtractColumns(cols, mcIn));
            // append extracted columns (if necessary)
            if (mcIn.getCols() > mcIn.getColsPerBlock())
                out = RDDAggregateUtils.mergeByKey(out);
            // actual index/data sort operation
            if (// sort indexes
            ixret)
                out = RDDSortUtils.sortIndexesByVals(out, !desc, mcIn.getRows(), (long) cols.length, mcIn.getRowsPerBlock());
            else if (// sort single-column matrix
            cols.length == mcIn.getCols() && !desc)
                out = RDDSortUtils.sortByVals(out, mcIn.getRows(), cols.length, mcIn.getRowsPerBlock());
            else
                // sort multi-column matrix
                out = RDDSortUtils.sortDataByVals(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), cols.length, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        }
    } else {
        throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
    }
    // store output rdd handle
    if (opcode.equalsIgnoreCase("rsort") && _col.getDataType().isMatrix())
        sec.releaseMatrixInput(_col.getName());
    updateReorgMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) ReorgOperator(org.apache.sysml.runtime.matrix.operators.ReorgOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ReorgMapFunction(org.apache.sysml.runtime.instructions.spark.functions.ReorgMapFunction) IsBlockInList(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInList)

Example 17 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class RmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdds
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mcOut = updateBinaryMMOutputMatrixCharacteristics(sec, true);
    // execute Spark RMM instruction
    // step 1: prepare join keys (w/ shallow replication), i/j/k
    JavaPairRDD<TripleIndexes, MatrixBlock> tmp1 = in1.flatMapToPair(new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
    JavaPairRDD<TripleIndexes, MatrixBlock> tmp2 = in2.flatMapToPair(new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
    // step 2: join prepared datasets, multiply, and aggregate
    int numPartJoin = Math.max(getNumJoinPartitions(mc1, mc2), SparkExecutionContext.getDefaultParallelism(true));
    int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join by result block
    numPartJoin).mapToPair(// do matrix multiplication
    new RmmMultiplyFunction());
    out = // aggregation per result block
    RDDAggregateUtils.sumByKeyStable(// aggregation per result block
    out, numPartOut, false);
    // put output block into symbol table (no lineage because single block)
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) TripleIndexes(org.apache.sysml.runtime.matrix.data.TripleIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 18 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class TernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = !input1.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = !input2.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = !input3.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    MatrixBlock m1 = input1.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input1).getDoubleValue());
    MatrixBlock m2 = input2.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input2).getDoubleValue());
    MatrixBlock m3 = input3.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input3).getDoubleValue());
    TernaryOperator op = (TernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (input1.isMatrix() && !input2.isMatrix() && !input3.isMatrix())
        out = in1.mapValues(new TernaryFunctionMSS(op, m1, m2, m3));
    else if (!input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
        out = in2.mapValues(new TernaryFunctionSMS(op, m1, m2, m3));
    else if (!input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
        out = in3.mapValues(new TernaryFunctionSSM(op, m1, m2, m3));
    else if (input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
        out = in1.join(in2).mapValues(new TernaryFunctionMMS(op, m1, m2, m3));
    else if (input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
        out = in1.join(in3).mapValues(new TernaryFunctionMSM(op, m1, m2, m3));
    else if (!input1.isMatrix() && input2.isMatrix() && input3.isMatrix())
        out = in2.join(in3).mapValues(new TernaryFunctionSMM(op, m1, m2, m3));
    else
        // all matrices
        out = in1.join(in2).join(in3).mapValues(new TernaryFunctionMMM(op, m1, m2, m3));
    // set output RDD
    updateTernaryOutputMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    if (input1.isMatrix())
        sec.addLineageRDD(output.getName(), input1.getName());
    if (input2.isMatrix())
        sec.addLineageRDD(output.getName(), input2.getName());
    if (input3.isMatrix())
        sec.addLineageRDD(output.getName(), input3.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) TernaryOperator(org.apache.sysml.runtime.matrix.operators.TernaryOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 19 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class Tsmm2SPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    // execute tsmm2 instruction
    // step 1: first pass of X, filter-collect-broadcast excess blocks
    JavaPairRDD<MatrixIndexes, MatrixBlock> tmp1 = in.filter(new IsBlockInRange(_type.isLeft() ? 1 : mc.getRowsPerBlock() + 1, mc.getRows(), _type.isLeft() ? mc.getColsPerBlock() + 1 : 1, mc.getCols(), mc)).mapToPair(new ShiftTSMMIndexesFunction(_type));
    PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(tmp1, (int) (_type.isLeft() ? mc.getRows() : mc.getRows() - mc.getRowsPerBlock()), (int) (_type.isLeft() ? mc.getCols() - mc.getColsPerBlock() : mc.getCols()), mc.getRowsPerBlock(), mc.getColsPerBlock(), -1L);
    Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
    // step 2: second pass of X, compute tsmm/mapmm and aggregate result blocks
    int outputDim = (int) (_type.isLeft() ? mc.getCols() : mc.getRows());
    if (OptimizerUtils.estimateSize(outputDim, outputDim) <= 32 * 1024 * 1024) {
        // default: <=32MB
        // output large blocks and reduceAll to avoid skew on combineByKey
        JavaRDD<MatrixBlock> tmp2 = in.map(new RDDTSMM2ExtFunction(bpmb, _type, outputDim, (int) mc.getRowsPerBlock()));
        MatrixBlock out = RDDAggregateUtils.sumStable(tmp2);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out, getExtendedOpcode());
    } else {
        // output individual output blocks and aggregate by key (no action)
        JavaPairRDD<MatrixIndexes, MatrixBlock> tmp2 = in.flatMapToPair(new RDDTSMM2Function(bpmb, _type));
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDAggregateUtils.sumByKeyStable(tmp2, false);
        // put output RDD handle into symbol table
        sec.getMatrixCharacteristics(output.getName()).set(outputDim, outputDim, mc.getRowsPerBlock(), mc.getColsPerBlock());
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) PartitionedBlock(org.apache.sysml.runtime.instructions.spark.data.PartitionedBlock) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 20 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class TsmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    // execute tsmm instruction (always produce exactly one output block)
    // (this formulation with values() requires --conf spark.driver.maxResultSize=0)
    JavaRDD<MatrixBlock> tmp = in.map(new RDDTSMMFunction(_type));
    MatrixBlock out = RDDAggregateUtils.sumStable(tmp);
    // put output block into symbol table (no lineage because single block)
    // this also includes implicit maintenance of matrix characteristics
    sec.setMatrixOutput(output.getName(), out, getExtendedOpcode());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4