use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.
the class DataPartitionerRemoteSparkMapper method call.
@Override
public Iterator<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
List<Tuple2<Long, Writable>> ret = new LinkedList<>();
MatrixIndexes key2 = arg0._1();
MatrixBlock value2 = arg0._2();
long row_offset = (key2.getRowIndex() - 1) * _brlen;
long col_offset = (key2.getColumnIndex() - 1) * _bclen;
long rows = value2.getNumRows();
long cols = value2.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1 || col_offset + cols > _clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + _rlen + ",1:" + _clen + "].");
}
// partition inputs according to partitioning scheme
switch(_dpf) {
case ROW_WISE:
{
MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
for (int i = 0; i < rows; i++) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = blks[i];
ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
}
break;
}
case ROW_BLOCK_WISE:
{
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
break;
}
case ROW_BLOCK_WISE_N:
{
if (_n >= _brlen) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
} else {
for (int i = 0; i < rows; i += _n) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = value2.slice(i, Math.min(i + (int) _n - 1, value2.getNumRows() - 1), 0, value2.getNumColumns() - 1, new MatrixBlock());
ret.add(new Tuple2<Long, Writable>(new Long((row_offset + i) / _n + 1), tmp));
}
}
break;
}
case COLUMN_WISE:
{
MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
for (int i = 0; i < cols; i++) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = blks[i];
ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
}
break;
}
case COLUMN_BLOCK_WISE:
{
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
break;
}
case COLUMN_BLOCK_WISE_N:
{
if (_n >= _bclen) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
} else {
for (int i = 0; i < cols; i += _n) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = value2.slice(0, value2.getNumRows() - 1, i, Math.min(i + (int) _n - 1, value2.getNumColumns() - 1), new MatrixBlock());
ret.add(new Tuple2<Long, Writable>(new Long((col_offset + i) / _n + 1), tmp));
}
}
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
}
return ret.iterator();
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.
the class DataPartitionerRemoteSparkReducer method call.
@Override
@SuppressWarnings("deprecation")
public void call(Tuple2<Long, Iterable<Writable>> arg0) throws Exception {
// prepare grouped partition input
Long key = arg0._1();
Iterator<Writable> valueList = arg0._2().iterator();
// write entire partition to binary block sequence file
SequenceFile.Writer writer = null;
try {
// create sequence file writer
Configuration job = new Configuration(ConfigurationManager.getCachedJobConf());
Path path = new Path(_fnameNew + File.separator + key);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
// write individual blocks unordered to output
while (valueList.hasNext()) {
PairWritableBlock pair = (PairWritableBlock) valueList.next();
writer.append(pair.indexes, pair.block);
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project systemml by apache.
the class DataPartitionerRemoteSparkReducer method call.
@Override
@SuppressWarnings("deprecation")
public void call(Tuple2<Long, Iterable<Writable>> arg0) throws Exception {
// prepare grouped partition input
Long key = arg0._1();
Iterator<Writable> valueList = arg0._2().iterator();
// write entire partition to binary block sequence file
SequenceFile.Writer writer = null;
try {
// create sequence file writer
Configuration job = new Configuration(ConfigurationManager.getCachedJobConf());
Path path = new Path(_fnameNew + File.separator + key);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
// write individual blocks unordered to output
while (valueList.hasNext()) {
PairWritableBlock pair = (PairWritableBlock) valueList.next();
writer.append(pair.indexes, pair.block);
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.
the class RemoteDPParForSparkWorker method collectBinaryBlock.
/**
* Collects a matrixblock partition from a given input iterator over
* binary blocks.
*
* Note it reuses the instance attribute _partition - multiple calls
* will overwrite the result.
*
* @param valueList iterable writables
* @param reuse matrix block partition for reuse
* @return matrix block
* @throws IOException if IOException occurs
*/
private MatrixBlock collectBinaryBlock(Iterable<Writable> valueList, MatrixBlock reuse) throws IOException {
// which avoids unnecessary copies and reduces memory pressure
if (valueList instanceof Collection && ((Collection<Writable>) valueList).size() == 1) {
return ((PairWritableBlock) valueList.iterator().next()).block;
}
// default: create or reuse target partition and copy individual partition fragments
// into this target, including nnz maintenance and potential dense-sparse format change
MatrixBlock partition = reuse;
try {
// reset reuse block, keep configured representation
if (_tSparseCol)
partition = new MatrixBlock(_clen, _rlen, true);
else if (partition != null)
partition.reset(_rlen, _clen, false);
else
partition = new MatrixBlock(_rlen, _clen, false);
long lnnz = 0;
for (Writable val : valueList) {
PairWritableBlock pval = (PairWritableBlock) val;
int row_offset = (int) (pval.indexes.getRowIndex() - 1) * _brlen;
int col_offset = (int) (pval.indexes.getColumnIndex() - 1) * _bclen;
if (// DENSE
!partition.isInSparseFormat())
partition.copy(row_offset, row_offset + pval.block.getNumRows() - 1, col_offset, col_offset + pval.block.getNumColumns() - 1, pval.block, false);
else
// SPARSE
partition.appendToSparse(pval.block, row_offset, col_offset);
lnnz += pval.block.getNonZeros();
}
// post-processing: cleanups if required
if (partition.isInSparseFormat() && _clen > _bclen)
partition.sortSparseRows();
partition.setNonZeros(lnnz);
partition.examSparsity();
} catch (DMLRuntimeException ex) {
throw new IOException(ex);
}
return partition;
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.
the class RemoteDPParWorkerReducer method collectBinaryBlock.
/**
* Collects a matrixblock partition from a given input iterator over
* binary blocks.
*
* Note it reuses the instance attribute _partition - multiple calls
* will overwrite the result.
*
* @param valueList iterable writables
* @return matrix block
* @throws IOException if IOException occurs
*/
private MatrixBlock collectBinaryBlock(Iterator<Writable> valueList) throws IOException {
try {
// reset reuse block, keep configured representation
_partition.reset(_rlen, _clen);
while (valueList.hasNext()) {
PairWritableBlock pairValue = (PairWritableBlock) valueList.next();
int row_offset = (int) (pairValue.indexes.getRowIndex() - 1) * _brlen;
int col_offset = (int) (pairValue.indexes.getColumnIndex() - 1) * _bclen;
MatrixBlock block = pairValue.block;
if (// DENSE
!_partition.isInSparseFormat()) {
_partition.copy(row_offset, row_offset + block.getNumRows() - 1, col_offset, col_offset + block.getNumColumns() - 1, pairValue.block, false);
} else // SPARSE
{
_partition.appendToSparse(pairValue.block, row_offset, col_offset);
}
}
// final partition cleanup
cleanupCollectedMatrixPartition(_partition.isInSparseFormat());
} catch (DMLRuntimeException ex) {
throw new IOException(ex);
}
return _partition;
}
Aggregations