Search in sources :

Example 31 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixAppendMSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    // map-only append (rhs must be vector and fit in mapper mem)
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    int brlen = mc1.getRowsPerBlock();
    int bclen = mc1.getColsPerBlock();
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(input2.getName());
    long off = sec.getScalarInput(_offset.getName(), _offset.getValueType(), _offset.isLiteral()).getLongValue();
    // execute map-append operations (partitioning preserving if #in-blocks = #out-blocks)
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (preservesPartitioning(mc1, mc2, _cbind)) {
        out = in1.mapPartitionsToPair(new MapSideAppendPartitionFunction(in2, _cbind, off, brlen, bclen), true);
    } else {
        out = in1.flatMapToPair(new MapSideAppendFunction(in2, _cbind, off, brlen, bclen));
    }
    // put output RDD handle into symbol table
    updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageBroadcast(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 32 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixIndexingSPInstruction method singleBlockIndexing.

private static MatrixBlock singleBlockIndexing(JavaPairRDD<MatrixIndexes, MatrixBlock> in1, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, IndexRange ixrange) {
    // single block output via lookup (on partitioned inputs, this allows for single partition
    // access to avoid a full scan of the input; note that this is especially important for
    // out-of-core datasets as entire partitions are read, not just keys as in the in-memory setting.
    long rix = UtilFunctions.computeBlockIndex(ixrange.rowStart, mcIn.getRowsPerBlock());
    long cix = UtilFunctions.computeBlockIndex(ixrange.colStart, mcIn.getColsPerBlock());
    List<MatrixBlock> list = in1.lookup(new MatrixIndexes(rix, cix));
    if (list.size() != 1)
        throw new DMLRuntimeException("Block lookup returned " + list.size() + " blocks (expected 1).");
    MatrixBlock tmp = list.get(0);
    MatrixBlock mbout = (tmp.getNumRows() == mcOut.getRows() && tmp.getNumColumns() == mcOut.getCols()) ? tmp : // reference full block or slice out sub-block
    tmp.slice(UtilFunctions.computeCellInBlock(ixrange.rowStart, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.rowEnd, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colStart, mcIn.getColsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colEnd, mcIn.getColsPerBlock()), new MatrixBlock());
    mbout.examSparsity();
    return mbout;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 33 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixIndexingSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    // get indexing range
    long rl = ec.getScalarInput(rowLower.getName(), rowLower.getValueType(), rowLower.isLiteral()).getLongValue();
    long ru = ec.getScalarInput(rowUpper.getName(), rowUpper.getValueType(), rowUpper.isLiteral()).getLongValue();
    long cl = ec.getScalarInput(colLower.getName(), colLower.getValueType(), colLower.isLiteral()).getLongValue();
    long cu = ec.getScalarInput(colUpper.getName(), colUpper.getValueType(), colUpper.isLiteral()).getLongValue();
    IndexRange ixrange = new IndexRange(rl, ru, cl, cu);
    // right indexing
    if (opcode.equalsIgnoreCase(RightIndex.OPCODE)) {
        // update and check output dimensions
        MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
        MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
        mcOut.set(ru - rl + 1, cu - cl + 1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        mcOut.setNonZerosBound(Math.min(mcOut.getLength(), mcIn.getNonZerosBound()));
        checkValidOutputDimensions(mcOut);
        // execute right indexing operation (partitioning-preserving if possible)
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
        if (isSingleBlockLookup(mcIn, ixrange)) {
            sec.setMatrixOutput(output.getName(), singleBlockIndexing(in1, mcIn, mcOut, ixrange), getExtendedOpcode());
        } else if (isMultiBlockLookup(in1, mcIn, mcOut, ixrange)) {
            sec.setMatrixOutput(output.getName(), multiBlockIndexing(in1, mcIn, mcOut, ixrange), getExtendedOpcode());
        } else {
            // rdd output for general case
            JavaPairRDD<MatrixIndexes, MatrixBlock> out = generalCaseRightIndexing(in1, mcIn, mcOut, ixrange, _aggType);
            // put output RDD handle into symbol table
            sec.setRDDHandleForVariable(output.getName(), out);
            sec.addLineageRDD(output.getName(), input1.getName());
        }
    } else // left indexing
    if (opcode.equalsIgnoreCase(LeftIndex.OPCODE) || opcode.equalsIgnoreCase("mapLeftIndex")) {
        String rddVar = (_type == LixCacheType.LEFT) ? input2.getName() : input1.getName();
        String bcVar = (_type == LixCacheType.LEFT) ? input1.getName() : input2.getName();
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
        PartitionedBroadcast<MatrixBlock> broadcastIn2 = null;
        JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
        // update and check output dimensions
        MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
        MatrixCharacteristics mcLeft = ec.getMatrixCharacteristics(input1.getName());
        mcOut.set(mcLeft.getRows(), mcLeft.getCols(), mcLeft.getRowsPerBlock(), mcLeft.getColsPerBlock());
        checkValidOutputDimensions(mcOut);
        // note: always matrix rhs, scalars are preprocessed via cast to 1x1 matrix
        MatrixCharacteristics mcRight = ec.getMatrixCharacteristics(input2.getName());
        // sanity check matching index range and rhs dimensions
        if (!mcRight.dimsKnown()) {
            throw new DMLRuntimeException("The right input matrix dimensions are not specified for MatrixIndexingSPInstruction");
        }
        if (!(ru - rl + 1 == mcRight.getRows() && cu - cl + 1 == mcRight.getCols())) {
            throw new DMLRuntimeException("Invalid index range of leftindexing: [" + rl + ":" + ru + "," + cl + ":" + cu + "] vs [" + mcRight.getRows() + "x" + mcRight.getCols() + "].");
        }
        if (opcode.equalsIgnoreCase("mapLeftIndex")) {
            broadcastIn2 = sec.getBroadcastForVariable(bcVar);
            // partitioning-preserving mappartitions (key access required for broadcast loopkup)
            out = in1.mapPartitionsToPair(new LeftIndexPartitionFunction(broadcastIn2, ixrange, _type, mcOut), true);
        } else {
            // general case
            // zero-out lhs
            in1 = in1.mapToPair(new ZeroOutLHS(false, ixrange, mcLeft));
            // slice rhs, shift and merge with lhs
            in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName()).flatMapToPair(new SliceRHSForLeftIndexing(ixrange, mcLeft));
            out = RDDAggregateUtils.mergeByKey(in1.union(in2));
        }
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), rddVar);
        if (broadcastIn2 != null)
            sec.addLineageBroadcast(output.getName(), bcVar);
        if (in2 != null)
            sec.addLineageRDD(output.getName(), input2.getName());
    } else
        throw new DMLRuntimeException("Invalid opcode (" + opcode + ") encountered in MatrixIndexingSPInstruction.");
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IndexRange(org.apache.sysml.runtime.util.IndexRange) PartitionedBroadcast(org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 34 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixIndexingSPInstruction method createPartitionPruningRDD.

/**
 * Wraps the input RDD into a PartitionPruningRDD, which acts as a filter
 * of required partitions. The distinct set of required partitions is determined
 * via the partitioner of the input RDD.
 *
 * @param in input matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
 * @param filter partition filter
 * @return matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
 */
private static JavaPairRDD<MatrixIndexes, MatrixBlock> createPartitionPruningRDD(JavaPairRDD<MatrixIndexes, MatrixBlock> in, List<MatrixIndexes> filter) {
    // build hashset of required partition ids
    HashSet<Integer> flags = new HashSet<>();
    Partitioner partitioner = in.rdd().partitioner().get();
    for (MatrixIndexes key : filter) flags.add(partitioner.getPartition(key));
    // create partition pruning rdd
    Function1<Object, Object> f = new PartitionPruningFunction(flags);
    PartitionPruningRDD<Tuple2<MatrixIndexes, MatrixBlock>> ppRDD = PartitionPruningRDD.create(in.rdd(), f);
    // wrap output into java pair rdd
    return new JavaPairRDD<>(ppRDD, ClassManifestFactory.fromClass(MatrixIndexes.class), ClassManifestFactory.fromClass(MatrixBlock.class));
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) Partitioner(org.apache.spark.Partitioner) HashSet(java.util.HashSet)

Example 35 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixReshapeSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get parameters
    // save cast
    long rows = ec.getScalarInput(_opRows.getName(), _opRows.getValueType(), _opRows.isLiteral()).getLongValue();
    // save cast
    long cols = ec.getScalarInput(_opCols.getName(), _opCols.getValueType(), _opCols.isLiteral()).getLongValue();
    boolean byRow = ec.getScalarInput(_opByRow.getName(), ValueType.BOOLEAN, _opByRow.isLiteral()).getBooleanValue();
    // get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    // update output characteristics and sanity check
    mcOut.set(rows, cols, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), mcIn.getNonZeros());
    if (!mcIn.nnzKnown())
        mcOut.setNonZerosBound(mcIn.getNonZerosBound());
    if (mcIn.getRows() * mcIn.getCols() != mcOut.getRows() * mcOut.getCols()) {
        throw new DMLRuntimeException("Incompatible matrix characteristics for reshape: " + mcIn.getRows() + "x" + mcIn.getCols() + " vs " + mcOut.getRows() + "x" + mcOut.getCols());
    }
    // execute reshape instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDReshapeFunction(mcIn, mcOut, byRow));
    out = RDDAggregateUtils.mergeByKey(out);
    // put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)459 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)142 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)111 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)102 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)48 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 IOException (java.io.IOException)44 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)41 ArrayList (java.util.ArrayList)40 Path (org.apache.hadoop.fs.Path)29 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)24 FileSystem (org.apache.hadoop.fs.FileSystem)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)23 JobConf (org.apache.hadoop.mapred.JobConf)21 Tuple2 (scala.Tuple2)19 SequenceFile (org.apache.hadoop.io.SequenceFile)17 Row (org.apache.spark.sql.Row)14 SparseBlock (org.apache.sysml.runtime.matrix.data.SparseBlock)14 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)14 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)13