use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class SparkExecutionContext method writeRDDtoHDFS.
@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
// piggyback nnz maintenance on write
LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
// save file is an action which also triggers nnz maintenance
lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
// return nnz aggregate of all blocks
return aNnz.value();
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class DataPartitionerLocal method partitionBinaryBlock.
@SuppressWarnings("deprecation")
private void partitionBinaryBlock(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
try {
// create reuse object
_reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, brlen, bclen);
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// prepare sequence file reader, and write to local staging area
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (// for each block
reader.next(key, value)) {
long row_offset = (key.getRowIndex() - 1) * brlen;
long col_offset = (key.getColumnIndex() - 1) * bclen;
long rows = value.getNumRows();
long cols = value.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
}
appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, brlen, bclen);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeBinaryBlockSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir, false);
}
} catch (Exception e) {
throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class DataPartitionerRemoteSparkMapper method call.
@Override
public Iterator<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
List<Tuple2<Long, Writable>> ret = new LinkedList<>();
MatrixIndexes key2 = arg0._1();
MatrixBlock value2 = arg0._2();
long row_offset = (key2.getRowIndex() - 1) * _brlen;
long col_offset = (key2.getColumnIndex() - 1) * _bclen;
long rows = value2.getNumRows();
long cols = value2.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1 || col_offset + cols > _clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + _rlen + ",1:" + _clen + "].");
}
// partition inputs according to partitioning scheme
switch(_dpf) {
case ROW_WISE:
{
MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
for (int i = 0; i < rows; i++) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = blks[i];
ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
}
break;
}
case ROW_BLOCK_WISE:
{
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
break;
}
case ROW_BLOCK_WISE_N:
{
if (_n >= _brlen) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
} else {
for (int i = 0; i < rows; i += _n) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = value2.slice(i, Math.min(i + (int) _n - 1, value2.getNumRows() - 1), 0, value2.getNumColumns() - 1, new MatrixBlock());
ret.add(new Tuple2<Long, Writable>(new Long((row_offset + i) / _n + 1), tmp));
}
}
break;
}
case COLUMN_WISE:
{
MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
for (int i = 0; i < cols; i++) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = blks[i];
ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
}
break;
}
case COLUMN_BLOCK_WISE:
{
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
break;
}
case COLUMN_BLOCK_WISE_N:
{
if (_n >= _bclen) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
} else {
for (int i = 0; i < cols; i += _n) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = value2.slice(0, value2.getNumRows() - 1, i, Math.min(i + (int) _n - 1, value2.getNumColumns() - 1), new MatrixBlock());
ret.add(new Tuple2<Long, Writable>(new Long((col_offset + i) / _n + 1), tmp));
}
}
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
}
return ret.iterator();
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class AggregateBinaryInstruction method processMapMultInstruction.
/**
* Helper function to perform map-side matrix-matrix multiplication.
*
* @param valueClass matrix value class
* @param cachedValues cached value map
* @param in1 indexed matrix value 1
* @param in2 indexed matrix value 2
* @param blockRowFactor ?
* @param blockColFactor ?
*/
private void processMapMultInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue in1, IndexedMatrixValue in2, int blockRowFactor, int blockColFactor) {
boolean removeOutput = true;
if (_cacheType.isRight()) {
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
long in2_cols = dcInput.getNumCols();
long in2_colBlocks = (long) Math.ceil(((double) in2_cols) / dcInput.getNumColsPerBlock());
for (int bidx = 1; bidx <= in2_colBlocks; bidx++) {
// Matrix multiply A[i,k] %*% B[k,bid]
// Setup input2 block
IndexedMatrixValue in2Block = dcInput.getDataBlock((int) in1.getIndexes().getColumnIndex(), bidx);
MatrixValue in2BlockValue = in2Block.getValue();
MatrixIndexes in2BlockIndex = in2Block.getIndexes();
// allocate space for the output value
IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
// process instruction
OperationsOnMatrixValues.performAggregateBinary(in1.getIndexes(), (MatrixBlock) in1.getValue(), in2BlockIndex, (MatrixBlock) in2BlockValue, out.getIndexes(), (MatrixBlock) out.getValue(), ((AggregateBinaryOperator) optr));
removeOutput &= (!_outputEmptyBlocks && out.getValue().isEmpty());
}
} else {
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input1);
long in1_rows = dcInput.getNumRows();
long in1_rowsBlocks = (long) Math.ceil(((double) in1_rows) / dcInput.getNumRowsPerBlock());
for (int bidx = 1; bidx <= in1_rowsBlocks; bidx++) {
// Matrix multiply A[i,k] %*% B[k,bid]
// Setup input2 block
IndexedMatrixValue in1Block = dcInput.getDataBlock(bidx, (int) in2.getIndexes().getRowIndex());
MatrixValue in1BlockValue = in1Block.getValue();
MatrixIndexes in1BlockIndex = in1Block.getIndexes();
// allocate space for the output value
IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
// process instruction
OperationsOnMatrixValues.performAggregateBinary(in1BlockIndex, (MatrixBlock) in1BlockValue, in2.getIndexes(), (MatrixBlock) in2.getValue(), out.getIndexes(), (MatrixBlock) out.getValue(), ((AggregateBinaryOperator) optr));
removeOutput &= (!_outputEmptyBlocks && out.getValue().isEmpty());
}
}
// empty block output filter (enabled by compiler consumer operation is in CP)
if (removeOutput)
cachedValues.remove(output);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class AppendGInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int brlen, int bclen) {
// setup basic meta data
int blen = _cbind ? bclen : brlen;
// Step 1: handle first input (forward blocks, change dim of last block)
ArrayList<IndexedMatrixValue> blkList1 = cachedValues.get(input1);
if (blkList1 != null)
for (IndexedMatrixValue in1 : blkList1) {
if (in1 == null)
continue;
if (_offset % blen == 0) {
// special case: forward only
cachedValues.add(output, in1);
} else // general case: change dims and forward
{
MatrixIndexes tmpix = in1.getIndexes();
// always block
MatrixBlock tmpval = (MatrixBlock) in1.getValue();
if (// border block
_cbind && _offset / blen + 1 == tmpix.getColumnIndex() || !_cbind && _offset / blen + 1 == tmpix.getRowIndex()) {
IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
// always block
MatrixBlock tmpvalNew = (MatrixBlock) data.getValue();
int lrlen = _cbind ? tmpval.getNumRows() : Math.min(blen, (int) (_len - (tmpix.getRowIndex() - 1) * blen));
int lclen = _cbind ? Math.min(blen, (int) (_len - (tmpix.getColumnIndex() - 1) * blen)) : tmpval.getNumColumns();
tmpvalNew.reset(lrlen, lclen);
tmpvalNew.copy(0, tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, tmpval, true);
data.getIndexes().setIndexes(tmpix);
} else // inner block
{
cachedValues.add(output, in1);
}
}
}
// Step 2: handle second input (split/forward blocks with new index)
ArrayList<IndexedMatrixValue> blkList2 = cachedValues.get(input2);
if (blkList2 != null)
for (IndexedMatrixValue in2 : blkList2) {
if (in2 == null)
continue;
MatrixIndexes tmpix = in2.getIndexes();
// always block
MatrixBlock tmpval = (MatrixBlock) in2.getValue();
if (// special case no split
_offset % bclen == 0) {
IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix1 = data.getIndexes();
long rix = _cbind ? tmpix.getRowIndex() : _offset / blen + tmpix.getRowIndex();
long cix = _cbind ? _offset / blen + tmpix.getColumnIndex() : tmpix.getColumnIndex();
ix1.setIndexes(rix, cix);
data.set(ix1, in2.getValue());
} else // general case: split and forward
{
IndexedMatrixValue data1 = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix1 = data1.getIndexes();
// always block
MatrixBlock tmpvalNew = (MatrixBlock) data1.getValue();
if (_cbind) {
// first half
int cix1 = (int) (_offset / blen + tmpix.getColumnIndex());
int cols1 = Math.min(blen, (int) (_len - (long) (cix1 - 1) * blen));
ix1.setIndexes(tmpix.getRowIndex(), cix1);
tmpvalNew.reset(tmpval.getNumRows(), cols1);
tmpvalNew.copy(0, tmpval.getNumRows() - 1, (int) ((_offset + 1) % blen) - 1, cols1 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, 0, (int) (cols1 - ((_offset) % blen) - 1), new MatrixBlock()), true);
data1.getIndexes().setIndexes(ix1);
if (cols1 - ((_offset) % blen) < tmpval.getNumColumns()) {
// second half (if required)
IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix2 = data2.getIndexes();
// always block
MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
int cix2 = (int) (_offset / blen + 1 + tmpix.getColumnIndex());
int cols2 = Math.min(blen, (int) (_len - (long) (cix2 - 1) * blen));
ix2.setIndexes(tmpix.getRowIndex(), cix2);
tmpvalNew2.reset(tmpval.getNumRows(), cols2);
tmpvalNew2.copy(0, tmpval.getNumRows() - 1, 0, cols2 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, (int) (cols1 - ((_offset) % blen)), tmpval.getNumColumns() - 1, new MatrixBlock()), true);
data2.getIndexes().setIndexes(ix2);
}
} else // rbind
{
// first half
int rix1 = (int) (_offset / blen + tmpix.getRowIndex());
int rows1 = Math.min(blen, (int) (_len - (long) (rix1 - 1) * blen));
ix1.setIndexes(rix1, tmpix.getColumnIndex());
tmpvalNew.reset(rows1, tmpval.getNumColumns());
tmpvalNew.copy((int) ((_offset + 1) % blen) - 1, rows1 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice(0, (int) (rows1 - ((_offset) % blen) - 1), 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
data1.getIndexes().setIndexes(ix1);
if (rows1 - ((_offset) % blen) < tmpval.getNumRows()) {
// second half (if required)
IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix2 = data2.getIndexes();
// always block
MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
int rix2 = (int) (_offset / blen + 1 + tmpix.getRowIndex());
int rows2 = Math.min(blen, (int) (_len - (long) (rix2 - 1) * blen));
ix2.setIndexes(rix2, tmpix.getColumnIndex());
tmpvalNew2.reset(rows2, tmpval.getNumColumns());
tmpvalNew2.copy(0, rows2 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice((int) (rows1 - ((_offset) % blen)), tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
data2.getIndexes().setIndexes(ix2);
}
}
}
}
}
Aggregations