use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class PairWritableCell method readFields.
@Override
public void readFields(DataInput in) throws IOException {
indexes = new MatrixIndexes();
indexes.readFields(in);
cell = new MatrixCell();
cell.readFields(in);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class SparkExecutionContext method createIndexedBlock.
private static Tuple2<MatrixIndexes, MatrixBlock> createIndexedBlock(MatrixBlock mb, MatrixCharacteristics mc, long ix) {
try {
// compute block indexes
long blockRow = ix / mc.getNumColBlocks();
long blockCol = ix % mc.getNumColBlocks();
// compute block sizes
int maxRow = UtilFunctions.computeBlockSize(mc.getRows(), blockRow + 1, mc.getRowsPerBlock());
int maxCol = UtilFunctions.computeBlockSize(mc.getCols(), blockCol + 1, mc.getColsPerBlock());
// copy sub-matrix to block
MatrixBlock block = new MatrixBlock(maxRow, maxCol, mb.isInSparseFormat());
int row_offset = (int) blockRow * mc.getRowsPerBlock();
int col_offset = (int) blockCol * mc.getColsPerBlock();
block = mb.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
// create key-value pair
return new Tuple2<>(new MatrixIndexes(blockRow + 1, blockCol + 1), block);
} catch (DMLRuntimeException ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class SparkExecutionContext method toMatrixBlock.
/**
* Utility method for creating a single matrix block out of a binary block RDD.
* Note that this collect call might trigger execution of any pending transformations.
*
* NOTE: This is an unguarded utility function, which requires memory for both the output matrix
* and its collected, blocked representation.
*
* @param rdd JavaPairRDD for matrix block
* @param rlen number of rows
* @param clen number of columns
* @param brlen number of rows in a block
* @param bclen number of columns in a block
* @param nnz number of non-zeros
* @return matrix block
*/
public static MatrixBlock toMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd, int rlen, int clen, int brlen, int bclen, long nnz) {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
MatrixBlock out = null;
if (// SINGLE BLOCK
rlen <= brlen && clen <= bclen) {
// special case without copy and nnz maintenance
List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
if (list.size() > 1)
throw new DMLRuntimeException("Expecting no more than one result block.");
else if (list.size() == 1)
out = list.get(0)._2();
else
// empty (e.g., after ops w/ outputEmpty=false)
out = new MatrixBlock(rlen, clen, true);
out.examSparsity();
} else // MULTIPLE BLOCKS
{
// determine target sparse/dense representation
long lnnz = (nnz >= 0) ? nnz : (long) rlen * clen;
boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, lnnz);
// create output matrix block (w/ lazy allocation)
out = new MatrixBlock(rlen, clen, sparse, lnnz);
List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
// copy blocks one-at-a-time into output matrix block
long aNnz = 0;
for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
// unpack index-block pair
MatrixIndexes ix = keyval._1();
MatrixBlock block = keyval._2();
// compute row/column block offsets
int row_offset = (int) (ix.getRowIndex() - 1) * brlen;
int col_offset = (int) (ix.getColumnIndex() - 1) * bclen;
int rows = block.getNumRows();
int cols = block.getNumColumns();
// handle compressed blocks (decompress for robustness)
if (block instanceof CompressedMatrixBlock)
block = ((CompressedMatrixBlock) block).decompress();
// append block
if (sparse) {
// SPARSE OUTPUT
// append block to sparse target in order to avoid shifting, where
// we use a shallow row copy in case of MCSR and single column blocks
// note: this append requires, for multiple column blocks, a final sort
out.appendToSparse(block, row_offset, col_offset, clen > bclen);
} else {
// DENSE OUTPUT
out.copy(row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, block, false);
}
// incremental maintenance nnz
aNnz += block.getNonZeros();
}
// post-processing output matrix
if (sparse && clen > bclen)
out.sortSparseRows();
out.setNonZeros(aNnz);
out.examSparsity();
}
if (DMLScript.STATISTICS) {
Statistics.accSparkCollectTime(System.nanoTime() - t0);
Statistics.incSparkCollectCount(1);
}
return out;
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class SparkExecutionContext method toPartitionedMatrixBlock.
public static PartitionedBlock<MatrixBlock> toPartitionedMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd, int rlen, int clen, int brlen, int bclen, long nnz) {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
PartitionedBlock<MatrixBlock> out = new PartitionedBlock<>(rlen, clen, brlen, bclen);
List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
// copy blocks one-at-a-time into output matrix block
for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
// unpack index-block pair
MatrixIndexes ix = keyval._1();
MatrixBlock block = keyval._2();
out.setBlock((int) ix.getRowIndex(), (int) ix.getColumnIndex(), block);
}
if (DMLScript.STATISTICS) {
Statistics.accSparkCollectTime(System.nanoTime() - t0);
Statistics.incSparkCollectCount(1);
}
return out;
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class SparkExecutionContext method cacheMatrixObject.
@SuppressWarnings("unchecked")
public void cacheMatrixObject(String var) {
// get input rdd and default storage level
MatrixObject mo = getMatrixObject(var);
// double check size to avoid unnecessary spark context creation
if (!OptimizerUtils.exceedsCachingThreshold(mo.getNumColumns(), (double) OptimizerUtils.estimateSizeExactSparsity(mo.getMatrixCharacteristics())))
return;
JavaPairRDD<MatrixIndexes, MatrixBlock> in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) getRDDHandleForMatrixObject(mo, InputInfo.BinaryBlockInputInfo);
// persist rdd (force rdd caching, if not already cached)
if (!isRDDCached(in.id()))
// trigger caching to prevent contention
in.count();
}
Aggregations