use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class SparkExecutionContext method writeRDDtoHDFS.
@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
// piggyback nnz maintenance on write
LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
// save file is an action which also triggers nnz maintenance
lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
// return nnz aggregate of all blocks
return aNnz.value();
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class DataPartitionerLocal method partitionBinaryBlock.
@SuppressWarnings("deprecation")
private void partitionBinaryBlock(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
try {
// create reuse object
_reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, brlen, bclen);
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// prepare sequence file reader, and write to local staging area
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (// for each block
reader.next(key, value)) {
long row_offset = (key.getRowIndex() - 1) * brlen;
long col_offset = (key.getColumnIndex() - 1) * bclen;
long rows = value.getNumRows();
long cols = value.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
}
appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, brlen, bclen);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeBinaryBlockSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir, false);
}
} catch (Exception e) {
throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class DataPartitionerRemoteSparkMapper method call.
@Override
public Iterator<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
List<Tuple2<Long, Writable>> ret = new LinkedList<>();
MatrixIndexes key2 = arg0._1();
MatrixBlock value2 = arg0._2();
long row_offset = (key2.getRowIndex() - 1) * _brlen;
long col_offset = (key2.getColumnIndex() - 1) * _bclen;
long rows = value2.getNumRows();
long cols = value2.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1 || col_offset + cols > _clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + _rlen + ",1:" + _clen + "].");
}
// partition inputs according to partitioning scheme
switch(_dpf) {
case ROW_WISE:
{
MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
for (int i = 0; i < rows; i++) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = blks[i];
ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
}
break;
}
case ROW_BLOCK_WISE:
{
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
break;
}
case ROW_BLOCK_WISE_N:
{
if (_n >= _brlen) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
} else {
for (int i = 0; i < rows; i += _n) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
tmp.block = value2.slice(i, Math.min(i + (int) _n - 1, value2.getNumRows() - 1), 0, value2.getNumColumns() - 1, new MatrixBlock());
ret.add(new Tuple2<Long, Writable>(new Long((row_offset + i) / _n + 1), tmp));
}
}
break;
}
case COLUMN_WISE:
{
MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
for (int i = 0; i < cols; i++) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = blks[i];
ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
}
break;
}
case COLUMN_BLOCK_WISE:
{
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
break;
}
case COLUMN_BLOCK_WISE_N:
{
if (_n >= _bclen) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
tmp.block = new MatrixBlock(value2);
ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
} else {
for (int i = 0; i < cols; i += _n) {
PairWritableBlock tmp = new PairWritableBlock();
tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
tmp.block = value2.slice(0, value2.getNumRows() - 1, i, Math.min(i + (int) _n - 1, value2.getNumColumns() - 1), new MatrixBlock());
ret.add(new Tuple2<Long, Writable>(new Long((col_offset + i) / _n + 1), tmp));
}
}
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
}
return ret.iterator();
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class AppendGInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int brlen, int bclen) {
// setup basic meta data
int blen = _cbind ? bclen : brlen;
// Step 1: handle first input (forward blocks, change dim of last block)
ArrayList<IndexedMatrixValue> blkList1 = cachedValues.get(input1);
if (blkList1 != null)
for (IndexedMatrixValue in1 : blkList1) {
if (in1 == null)
continue;
if (_offset % blen == 0) {
// special case: forward only
cachedValues.add(output, in1);
} else // general case: change dims and forward
{
MatrixIndexes tmpix = in1.getIndexes();
// always block
MatrixBlock tmpval = (MatrixBlock) in1.getValue();
if (// border block
_cbind && _offset / blen + 1 == tmpix.getColumnIndex() || !_cbind && _offset / blen + 1 == tmpix.getRowIndex()) {
IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
// always block
MatrixBlock tmpvalNew = (MatrixBlock) data.getValue();
int lrlen = _cbind ? tmpval.getNumRows() : Math.min(blen, (int) (_len - (tmpix.getRowIndex() - 1) * blen));
int lclen = _cbind ? Math.min(blen, (int) (_len - (tmpix.getColumnIndex() - 1) * blen)) : tmpval.getNumColumns();
tmpvalNew.reset(lrlen, lclen);
tmpvalNew.copy(0, tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, tmpval, true);
data.getIndexes().setIndexes(tmpix);
} else // inner block
{
cachedValues.add(output, in1);
}
}
}
// Step 2: handle second input (split/forward blocks with new index)
ArrayList<IndexedMatrixValue> blkList2 = cachedValues.get(input2);
if (blkList2 != null)
for (IndexedMatrixValue in2 : blkList2) {
if (in2 == null)
continue;
MatrixIndexes tmpix = in2.getIndexes();
// always block
MatrixBlock tmpval = (MatrixBlock) in2.getValue();
if (// special case no split
_offset % bclen == 0) {
IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix1 = data.getIndexes();
long rix = _cbind ? tmpix.getRowIndex() : _offset / blen + tmpix.getRowIndex();
long cix = _cbind ? _offset / blen + tmpix.getColumnIndex() : tmpix.getColumnIndex();
ix1.setIndexes(rix, cix);
data.set(ix1, in2.getValue());
} else // general case: split and forward
{
IndexedMatrixValue data1 = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix1 = data1.getIndexes();
// always block
MatrixBlock tmpvalNew = (MatrixBlock) data1.getValue();
if (_cbind) {
// first half
int cix1 = (int) (_offset / blen + tmpix.getColumnIndex());
int cols1 = Math.min(blen, (int) (_len - (long) (cix1 - 1) * blen));
ix1.setIndexes(tmpix.getRowIndex(), cix1);
tmpvalNew.reset(tmpval.getNumRows(), cols1);
tmpvalNew.copy(0, tmpval.getNumRows() - 1, (int) ((_offset + 1) % blen) - 1, cols1 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, 0, (int) (cols1 - ((_offset) % blen) - 1), new MatrixBlock()), true);
data1.getIndexes().setIndexes(ix1);
if (cols1 - ((_offset) % blen) < tmpval.getNumColumns()) {
// second half (if required)
IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix2 = data2.getIndexes();
// always block
MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
int cix2 = (int) (_offset / blen + 1 + tmpix.getColumnIndex());
int cols2 = Math.min(blen, (int) (_len - (long) (cix2 - 1) * blen));
ix2.setIndexes(tmpix.getRowIndex(), cix2);
tmpvalNew2.reset(tmpval.getNumRows(), cols2);
tmpvalNew2.copy(0, tmpval.getNumRows() - 1, 0, cols2 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, (int) (cols1 - ((_offset) % blen)), tmpval.getNumColumns() - 1, new MatrixBlock()), true);
data2.getIndexes().setIndexes(ix2);
}
} else // rbind
{
// first half
int rix1 = (int) (_offset / blen + tmpix.getRowIndex());
int rows1 = Math.min(blen, (int) (_len - (long) (rix1 - 1) * blen));
ix1.setIndexes(rix1, tmpix.getColumnIndex());
tmpvalNew.reset(rows1, tmpval.getNumColumns());
tmpvalNew.copy((int) ((_offset + 1) % blen) - 1, rows1 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice(0, (int) (rows1 - ((_offset) % blen) - 1), 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
data1.getIndexes().setIndexes(ix1);
if (rows1 - ((_offset) % blen) < tmpval.getNumRows()) {
// second half (if required)
IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
MatrixIndexes ix2 = data2.getIndexes();
// always block
MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
int rix2 = (int) (_offset / blen + 1 + tmpix.getRowIndex());
int rows2 = Math.min(blen, (int) (_len - (long) (rix2 - 1) * blen));
ix2.setIndexes(rix2, tmpix.getColumnIndex());
tmpvalNew2.reset(rows2, tmpval.getNumColumns());
tmpvalNew2.copy(0, rows2 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice((int) (rows1 - ((_offset) % blen)), tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
data2.getIndexes().setIndexes(ix2);
}
}
}
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class QuaternaryInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
QuaternaryOperator qop = (QuaternaryOperator) optr;
ArrayList<IndexedMatrixValue> blkList = cachedValues.get(_input1);
if (blkList != null)
for (IndexedMatrixValue imv : blkList) {
// Step 1: prepare inputs and output
if (imv == null)
continue;
MatrixIndexes inIx = imv.getIndexes();
MatrixBlock inVal = (MatrixBlock) imv.getValue();
// allocate space for the output value
IndexedMatrixValue iout = null;
if (output == _input1)
iout = tempValue;
else
iout = cachedValues.holdPlace(output, valueClass);
MatrixIndexes outIx = iout.getIndexes();
MatrixValue outVal = iout.getValue();
// Step 2: get remaining inputs: Wij, Ui, Vj
MatrixBlock Xij = inVal;
// get Wij if existing (null of WeightsType.NONE or WSigmoid any type)
IndexedMatrixValue iWij = (_input4 != -1) ? cachedValues.getFirst(_input4) : null;
MatrixValue Wij = (iWij != null) ? iWij.getValue() : null;
if (null == Wij && qop.hasFourInputs()) {
MatrixBlock mb = new MatrixBlock(1, 1, false);
String[] parts = InstructionUtils.getInstructionParts(instString);
mb.quickSetValue(0, 0, Double.valueOf(parts[4]));
Wij = mb;
}
// get Ui and Vj, potentially through distributed cache
MatrixValue Ui = // U
(!_cacheU) ? // U
cachedValues.getFirst(_input2).getValue() : MRBaseForCommonInstructions.dcValues.get(_input2).getDataBlock((int) inIx.getRowIndex(), 1).getValue();
MatrixValue Vj = // t(V)
(!_cacheV) ? // t(V)
cachedValues.getFirst(_input3).getValue() : MRBaseForCommonInstructions.dcValues.get(_input3).getDataBlock((int) inIx.getColumnIndex(), 1).getValue();
// handle special input case: //V through shuffle -> t(V)
if (Ui.getNumColumns() != Vj.getNumColumns()) {
Vj = LibMatrixReorg.reorg((MatrixBlock) Vj, new MatrixBlock(Vj.getNumColumns(), Vj.getNumRows(), Vj.isInSparseFormat()), new ReorgOperator(SwapIndex.getSwapIndexFnObject()));
}
// Step 3: process instruction
Xij.quaternaryOperations(qop, (MatrixBlock) Ui, (MatrixBlock) Vj, (MatrixBlock) Wij, (MatrixBlock) outVal);
if (qop.wtype1 != null || qop.wtype4 != null)
// wsloss
outIx.setIndexes(1, 1);
else if (qop.wtype2 != null || qop.wtype5 != null || qop.wtype3 != null && qop.wtype3.isBasic())
// wsigmoid/wdivmm-basic
outIx.setIndexes(inIx);
else {
// wdivmm
boolean left = qop.wtype3.isLeft();
outIx.setIndexes(left ? inIx.getColumnIndex() : inIx.getRowIndex(), 1);
}
// put the output value in the cache
if (iout == tempValue)
cachedValues.add(output, iout);
}
}
Aggregations