Search in sources :

Example 41 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class PairWritableCell method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    indexes = new MatrixIndexes();
    indexes.readFields(in);
    cell = new MatrixCell();
    cell.readFields(in);
}
Also used : MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell)

Example 42 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class SparkExecutionContext method createIndexedBlock.

private static Tuple2<MatrixIndexes, MatrixBlock> createIndexedBlock(MatrixBlock mb, MatrixCharacteristics mc, long ix) {
    try {
        // compute block indexes
        long blockRow = ix / mc.getNumColBlocks();
        long blockCol = ix % mc.getNumColBlocks();
        // compute block sizes
        int maxRow = UtilFunctions.computeBlockSize(mc.getRows(), blockRow + 1, mc.getRowsPerBlock());
        int maxCol = UtilFunctions.computeBlockSize(mc.getCols(), blockCol + 1, mc.getColsPerBlock());
        // copy sub-matrix to block
        MatrixBlock block = new MatrixBlock(maxRow, maxCol, mb.isInSparseFormat());
        int row_offset = (int) blockRow * mc.getRowsPerBlock();
        int col_offset = (int) blockCol * mc.getColsPerBlock();
        block = mb.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
        // create key-value pair
        return new Tuple2<>(new MatrixIndexes(blockRow + 1, blockCol + 1), block);
    } catch (DMLRuntimeException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Checkpoint(org.apache.sysml.lops.Checkpoint) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 43 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class SparkExecutionContext method toMatrixBlock.

/**
 * Utility method for creating a single matrix block out of a binary block RDD.
 * Note that this collect call might trigger execution of any pending transformations.
 *
 * NOTE: This is an unguarded utility function, which requires memory for both the output matrix
 * and its collected, blocked representation.
 *
 * @param rdd JavaPairRDD for matrix block
 * @param rlen number of rows
 * @param clen number of columns
 * @param brlen number of rows in a block
 * @param bclen number of columns in a block
 * @param nnz number of non-zeros
 * @return matrix block
 */
public static MatrixBlock toMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd, int rlen, int clen, int brlen, int bclen, long nnz) {
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    MatrixBlock out = null;
    if (// SINGLE BLOCK
    rlen <= brlen && clen <= bclen) {
        // special case without copy and nnz maintenance
        List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
        if (list.size() > 1)
            throw new DMLRuntimeException("Expecting no more than one result block.");
        else if (list.size() == 1)
            out = list.get(0)._2();
        else
            // empty (e.g., after ops w/ outputEmpty=false)
            out = new MatrixBlock(rlen, clen, true);
        out.examSparsity();
    } else // MULTIPLE BLOCKS
    {
        // determine target sparse/dense representation
        long lnnz = (nnz >= 0) ? nnz : (long) rlen * clen;
        boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, lnnz);
        // create output matrix block (w/ lazy allocation)
        out = new MatrixBlock(rlen, clen, sparse, lnnz);
        List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
        // copy blocks one-at-a-time into output matrix block
        long aNnz = 0;
        for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
            // unpack index-block pair
            MatrixIndexes ix = keyval._1();
            MatrixBlock block = keyval._2();
            // compute row/column block offsets
            int row_offset = (int) (ix.getRowIndex() - 1) * brlen;
            int col_offset = (int) (ix.getColumnIndex() - 1) * bclen;
            int rows = block.getNumRows();
            int cols = block.getNumColumns();
            // handle compressed blocks (decompress for robustness)
            if (block instanceof CompressedMatrixBlock)
                block = ((CompressedMatrixBlock) block).decompress();
            // append block
            if (sparse) {
                // SPARSE OUTPUT
                // append block to sparse target in order to avoid shifting, where
                // we use a shallow row copy in case of MCSR and single column blocks
                // note: this append requires, for multiple column blocks, a final sort
                out.appendToSparse(block, row_offset, col_offset, clen > bclen);
            } else {
                // DENSE OUTPUT
                out.copy(row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, block, false);
            }
            // incremental maintenance nnz
            aNnz += block.getNonZeros();
        }
        // post-processing output matrix
        if (sparse && clen > bclen)
            out.sortSparseRows();
        out.setNonZeros(aNnz);
        out.examSparsity();
    }
    if (DMLScript.STATISTICS) {
        Statistics.accSparkCollectTime(System.nanoTime() - t0);
        Statistics.incSparkCollectCount(1);
    }
    return out;
}
Also used : CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Checkpoint(org.apache.sysml.lops.Checkpoint) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 44 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class SparkExecutionContext method toPartitionedMatrixBlock.

public static PartitionedBlock<MatrixBlock> toPartitionedMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd, int rlen, int clen, int brlen, int bclen, long nnz) {
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    PartitionedBlock<MatrixBlock> out = new PartitionedBlock<>(rlen, clen, brlen, bclen);
    List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
    // copy blocks one-at-a-time into output matrix block
    for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
        // unpack index-block pair
        MatrixIndexes ix = keyval._1();
        MatrixBlock block = keyval._2();
        out.setBlock((int) ix.getRowIndex(), (int) ix.getColumnIndex(), block);
    }
    if (DMLScript.STATISTICS) {
        Statistics.accSparkCollectTime(System.nanoTime() - t0);
        Statistics.incSparkCollectCount(1);
    }
    return out;
}
Also used : PartitionedBlock(org.apache.sysml.runtime.instructions.spark.data.PartitionedBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 45 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class SparkExecutionContext method cacheMatrixObject.

@SuppressWarnings("unchecked")
public void cacheMatrixObject(String var) {
    // get input rdd and default storage level
    MatrixObject mo = getMatrixObject(var);
    // double check size to avoid unnecessary spark context creation
    if (!OptimizerUtils.exceedsCachingThreshold(mo.getNumColumns(), (double) OptimizerUtils.estimateSizeExactSparsity(mo.getMatrixCharacteristics())))
        return;
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(mo, InputInfo.BinaryBlockInputInfo);
    // persist rdd (force rdd caching, if not already cached)
    if (!isRDDCached(in.id()))
        // trigger caching to prevent contention
        in.count();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD)

Aggregations

MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)165 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)142 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)70 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)41 Path (org.apache.hadoop.fs.Path)24 SequenceFile (org.apache.hadoop.io.SequenceFile)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)20 FileSystem (org.apache.hadoop.fs.FileSystem)20 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)19 Tuple2 (scala.Tuple2)19 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)17 JobConf (org.apache.hadoop.mapred.JobConf)14 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)11 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)10 File (java.io.File)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9