use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class RDDSortUtils method sortDataByValMemSort.
/**
* This function collects and sorts value column in memory and then broadcasts it.
*
* @param val value as {@code JavaPairRDD<MatrixIndexes, MatrixBlock>}
* @param data data as {@code JavaPairRDD<MatrixIndexes, MatrixBlock>}
* @param asc if true, sort ascending
* @param rlen number of rows
* @param clen number of columns
* @param brlen number of rows in a block
* @param bclen number of columns in a block
* @param sec spark execution context
* @param r_op reorg operator
* @return data as {@code JavaPairRDD<MatrixIndexes, MatrixBlock>}
*/
public static JavaPairRDD<MatrixIndexes, MatrixBlock> sortDataByValMemSort(JavaPairRDD<MatrixIndexes, MatrixBlock> val, JavaPairRDD<MatrixIndexes, MatrixBlock> data, boolean asc, long rlen, long clen, int brlen, int bclen, SparkExecutionContext sec, ReorgOperator r_op) {
// collect orderby column for in-memory sorting
MatrixBlock inMatBlock = SparkExecutionContext.toMatrixBlock(val, (int) rlen, 1, brlen, bclen, -1);
// in-memory sort operation (w/ index return: source index in target position)
ReorgOperator lrop = new ReorgOperator(new SortIndex(1, !asc, true));
MatrixBlock sortedIx = (MatrixBlock) inMatBlock.reorgOperations(lrop, new MatrixBlock(), -1, -1, -1);
// flip sort indices from <source ix in target pos> to <target ix in source pos>
MatrixBlock sortedIxSrc = new MatrixBlock(sortedIx.getNumRows(), 1, false);
for (int i = 0; i < sortedIx.getNumRows(); i++) sortedIxSrc.quickSetValue((int) sortedIx.quickGetValue(i, 0) - 1, 0, i + 1);
// broadcast index vector
PartitionedBlock<MatrixBlock> pmb = new PartitionedBlock<>(sortedIxSrc, brlen, bclen);
Broadcast<PartitionedBlock<MatrixBlock>> _pmb = sec.getSparkContext().broadcast(pmb);
// sort data with broadcast index vector
JavaPairRDD<MatrixIndexes, RowMatrixBlock> ret = data.mapPartitionsToPair(new ShuffleMatrixBlockRowsInMemFunction(rlen, brlen, _pmb));
return RDDAggregateUtils.mergeRowsByKey(ret);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class RDDSortUtils method sortByVals.
public static JavaPairRDD<MatrixIndexes, MatrixBlock> sortByVals(JavaPairRDD<MatrixIndexes, MatrixBlock> in, long rlen, long clen, int brlen) {
// create value-index rdd from inputs
JavaRDD<MatrixBlock> dvals = in.values().flatMap(new ExtractRowsFunction());
// sort (creates sorted range per partition)
int numPartitions = SparkUtils.getNumPreferredPartitions(new MatrixCharacteristics(rlen, clen, brlen, brlen), in);
JavaRDD<MatrixBlock> sdvals = dvals.sortBy(new CreateDoubleKeysFunction(), true, numPartitions);
// create binary block output
JavaPairRDD<MatrixIndexes, MatrixBlock> ret = sdvals.zipWithIndex().mapPartitionsToPair(new ConvertToBinaryBlockFunction5(rlen, brlen));
ret = RDDAggregateUtils.mergeByKey(ret, false);
return ret;
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class ReaderBinaryBlock method readBinaryBlockMatrixBlocksFromHDFS.
private static void readBinaryBlockMatrixBlocksFromHDFS(Path path, JobConf job, FileSystem fs, Collection<IndexedMatrixValue> dest, long rlen, long clen, int brlen, int bclen) throws IOException {
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
// set up preferred custom serialization framework for binary block format
if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
MRJobConfiguration.addBinaryBlockSerializationFramework(job);
for (// 1..N files
Path lpath : // 1..N files
IOUtilFunctions.getSequenceFilePaths(fs, path)) {
// directly read from sequence files (individual partfiles)
SequenceFile.Reader reader = new SequenceFile.Reader(job, SequenceFile.Reader.file(lpath));
try {
while (reader.next(key, value)) {
int row_offset = (int) (key.getRowIndex() - 1) * brlen;
int col_offset = (int) (key.getColumnIndex() - 1) * bclen;
int rows = value.getNumRows();
int cols = value.getNumColumns();
// bound check per block
if (row_offset + rows < 0 || row_offset + rows > rlen || col_offset + cols < 0 || col_offset + cols > clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
}
// copy block to result
dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value)));
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class TestUtils method writeBinaryTestMatrixBlocks.
/**
* <p>
* Writes a matrix to a file using the binary blocks format.
* </p>
*
* @param file
* file name
* @param matrix
* matrix
* @param rowsInBlock
* rows in block
* @param colsInBlock
* columns in block
* @param sparseFormat
* sparse format
*/
@SuppressWarnings("deprecation")
public static void writeBinaryTestMatrixBlocks(String file, double[][] matrix, int rowsInBlock, int colsInBlock, boolean sparseFormat) {
SequenceFile.Writer writer = null;
try {
Path path = new Path(file);
FileSystem fs = IOUtilFunctions.getFileSystem(path, conf);
writer = new SequenceFile.Writer(fs, conf, path, MatrixIndexes.class, MatrixBlock.class);
MatrixIndexes index = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
for (int i = 0; i < matrix.length; i += rowsInBlock) {
int rows = Math.min(rowsInBlock, (matrix.length - i));
for (int j = 0; j < matrix[i].length; j += colsInBlock) {
int cols = Math.min(colsInBlock, (matrix[i].length - j));
index.setIndexes(((i / rowsInBlock) + 1), ((j / colsInBlock) + 1));
value = new MatrixBlock(rows, cols, sparseFormat);
for (int k = 0; k < rows; k++) {
for (int l = 0; l < cols; l++) {
value.setValue(k, l, matrix[i + k][j + l]);
}
}
writer.append(index, value);
}
}
} catch (IOException e) {
e.printStackTrace();
fail("unable to write test matrix: " + e.getMessage());
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class TestUtils method writeBinaryTestMatrixCells.
/**
* <p>
* Writes a matrix to a file using the binary cells format.
* </p>
*
* @param file
* file name
* @param matrix
* matrix
*/
@SuppressWarnings("deprecation")
public static void writeBinaryTestMatrixCells(String file, double[][] matrix) {
try {
SequenceFile.Writer writer = null;
try {
Path path = new Path(file);
FileSystem fs = IOUtilFunctions.getFileSystem(path, conf);
writer = new SequenceFile.Writer(fs, conf, path, MatrixIndexes.class, MatrixCell.class);
MatrixIndexes index = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[i].length; j++) {
if (matrix[i][j] != 0) {
index.setIndexes((i + 1), (j + 1));
value.setValue(matrix[i][j]);
writer.append(index, value);
}
}
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
} catch (IOException e) {
e.printStackTrace();
fail("unable to write test matrix: " + e.getMessage());
}
}
Aggregations