use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class WriterBinaryBlock method writePartitionedBinaryBlockMatrixToHDFS.
@SuppressWarnings("deprecation")
public final void writePartitionedBinaryBlockMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen, PDataPartitionFormat pformat) throws IOException, DMLRuntimeException {
boolean sparse = src.isInSparseFormat();
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
//set up preferred custom serialization framework for binary block format
if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
MRJobConfiguration.addBinaryBlockSerializationFramework(job);
//initialize blocks for reuse (at most 4 different blocks required)
MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros());
switch(pformat) {
case ROW_BLOCK_WISE_N:
{
long numBlocks = ((rlen - 1) / brlen) + 1;
long numPartBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
int count = 0;
for (int k = 0; k < numBlocks; k += numPartBlocks) {
// 1) create sequence file writer, with right replication factor
// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
Path path2 = new Path(path.toString() + File.separator + (++count));
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path2, MatrixIndexes.class, MatrixBlock.class);
//3) reblock and write
try {
MatrixIndexes indexes = new MatrixIndexes();
//create and write subblocks of matrix
for (int blockRow = k; blockRow < Math.min((int) Math.ceil(src.getNumRows() / (double) brlen), k + numPartBlocks); blockRow++) for (int blockCol = 0; blockCol < (int) Math.ceil(src.getNumColumns() / (double) bclen); blockCol++) {
int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen;
int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen;
int row_offset = blockRow * brlen;
int col_offset = blockCol * bclen;
//get reuse matrix block
MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
//copy submatrix to block
src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
//append block to sequence file
indexes.setIndexes(blockRow + 1, blockCol + 1);
writer.append(indexes, block);
//reset block for later reuse
block.reset();
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
break;
}
case COLUMN_BLOCK_WISE_N:
{
long numBlocks = ((clen - 1) / bclen) + 1;
long numPartBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
int count = 0;
for (int k = 0; k < numBlocks; k += numPartBlocks) {
// 1) create sequence file writer, with right replication factor
// (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
Path path2 = new Path(path.toString() + File.separator + (++count));
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path2, MatrixIndexes.class, MatrixBlock.class);
//3) reblock and write
try {
MatrixIndexes indexes = new MatrixIndexes();
//create and write subblocks of matrix
for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = k; blockCol < Math.min((int) Math.ceil(src.getNumColumns() / (double) bclen), k + numPartBlocks); blockCol++) {
int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen;
int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen;
int row_offset = blockRow * brlen;
int col_offset = blockCol * bclen;
//get reuse matrix block
MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
//copy submatrix to block
src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
//append block to sequence file
indexes.setIndexes(blockRow + 1, blockCol + 1);
writer.append(indexes, block);
//reset block for later reuse
block.reset();
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
break;
}
default:
throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class WriterBinaryCell method writeEmptyMatrixToHDFS.
@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
SequenceFile.Writer writer = null;
try {
writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
MatrixIndexes index = new MatrixIndexes(1, 1);
MatrixCell cell = new MatrixCell(0);
writer.append(index, cell);
} finally {
IOUtilFunctions.closeSilently(writer);
}
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class WriterBinaryCell method writeBinaryCellMatrixToHDFS.
@SuppressWarnings("deprecation")
protected void writeBinaryCellMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen) throws IOException {
boolean sparse = src.isInSparseFormat();
boolean entriesWritten = false;
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
MatrixIndexes indexes = new MatrixIndexes();
MatrixCell cell = new MatrixCell();
int rows = src.getNumRows();
int cols = src.getNumColumns();
try {
//bound check per block
if (rows > rlen || cols > clen) {
throw new IOException("Matrix block [1:" + rows + ",1:" + cols + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
}
if (//SPARSE
sparse) {
Iterator<IJV> iter = src.getSparseBlockIterator();
while (iter.hasNext()) {
IJV lcell = iter.next();
indexes.setIndexes(lcell.getI() + 1, lcell.getJ() + 1);
cell.setValue(lcell.getV());
writer.append(indexes, cell);
entriesWritten = true;
}
} else //DENSE
{
for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
double lvalue = src.getValueDenseUnsafe(i, j);
if (//for nnz
lvalue != 0) {
indexes.setIndexes(i + 1, j + 1);
cell.setValue(lvalue);
writer.append(indexes, cell);
entriesWritten = true;
}
}
}
//handle empty result
if (!entriesWritten) {
writer.append(new MatrixIndexes(1, 1), new MatrixCell(0));
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class CSVWriteMapper method map.
@Override
@SuppressWarnings("unchecked")
public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out, Reporter reporter) throws IOException {
long start = System.currentTimeMillis();
//for each represenattive matrix, read the record and apply instructions
for (int i = 0; i < representativeMatrixes.size(); i++) {
//convert the record into the right format for the representatice matrix
inputConverter.setBlockSize(brlens[i], bclens[i]);
inputConverter.convert(rawKey, rawValue);
byte thisMatrix = representativeMatrixes.get(i);
//apply unary instructions on the converted indexes and values
while (inputConverter.hasNext()) {
Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next();
MatrixIndexes indexes = pair.getKey();
MatrixBlock value = pair.getValue();
outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex());
ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix);
for (byte output : outputs) {
outIndexes.setTag(output);
out.collect(outIndexes, value);
//LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output);
}
}
}
reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class GMRCtableBuffer method flushBuffer.
@SuppressWarnings("deprecation")
public void flushBuffer(Reporter reporter) throws RuntimeException {
try {
if (_mapBuffer != null) {
//new MatrixIndexes();
MatrixIndexes key = null;
MatrixCell value = new MatrixCell();
for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
CTableMap resultMap = ctable.getValue();
//maintain result dims and nonzeros
for (Integer i : resultIDs) {
_resultNonZeros[i] += resultMap.size();
if (_resultDimsUnknown[i] == (byte) 1) {
_resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
_resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
}
}
//output result data
for (LLDoubleEntry e : resultMap.entrySet()) {
key = new MatrixIndexes(e.key1, e.key2);
value.setValue(e.value);
for (Integer i : resultIDs) {
_collector.collectOutput(key, value, i, reporter);
}
}
}
} else if (_blockBuffer != null) {
MatrixIndexes key = new MatrixIndexes(1, 1);
//DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
MatrixBlock outBlock = ctable.getValue();
outBlock.recomputeNonZeros();
// TODO: change hard coding of 1000
int brlen = 1000, bclen = 1000;
int rlen = outBlock.getNumRows();
int clen = outBlock.getNumColumns();
// final output matrix is smaller than a single block
if (rlen <= brlen && clen <= brlen) {
key = new MatrixIndexes(1, 1);
for (Integer i : resultIDs) {
_collector.collectOutput(key, outBlock, i, reporter);
_resultNonZeros[i] += outBlock.getNonZeros();
}
} else {
//Following code is similar to that in DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
//initialize blocks for reuse (at most 4 different blocks required)
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, clen, brlen, bclen, true, outBlock.getNonZeros());
//create and write subblocks of matrix
for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) {
int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;
int row_offset = blockRow * brlen;
int col_offset = blockCol * bclen;
//get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
//copy submatrix to block
outBlock.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
// TODO: skip empty "block"
//append block to sequence file
key.setIndexes(blockRow + 1, blockCol + 1);
for (Integer i : resultIDs) {
_collector.collectOutput(key, block, i, reporter);
_resultNonZeros[i] += block.getNonZeros();
}
//reset block for later reuse
block.reset();
}
}
}
}
} else {
throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
}
} catch (Exception ex) {
throw new RuntimeException("Failed to flush ctable buffer.", ex);
}
//remove existing partial ctables
if (_mapBuffer != null)
_mapBuffer.clear();
else
_blockBuffer.clear();
}
Aggregations