use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.
the class ReaderBinaryCell method readBinaryCellMatrixFromHDFS.
@SuppressWarnings("deprecation")
private static void readBinaryCellMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen) throws IOException {
boolean sparse = dest.isInSparseFormat();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
int row = -1;
int col = -1;
try {
for (// 1..N files
Path lpath : // 1..N files
IOUtilFunctions.getSequenceFilePaths(fs, path)) {
// directly read from sequence files (individual partfiles)
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
if (sparse) {
while (reader.next(key, value)) {
row = (int) key.getRowIndex() - 1;
col = (int) key.getColumnIndex() - 1;
double lvalue = value.getValue();
dest.appendValue(row, col, lvalue);
}
} else {
while (reader.next(key, value)) {
row = (int) key.getRowIndex() - 1;
col = (int) key.getColumnIndex() - 1;
double lvalue = value.getValue();
dest.appendValue(row, col, lvalue);
}
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
if (sparse)
dest.sortSparseRows();
} catch (Exception ex) {
// post-mortem error handling and bounds checking
if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
throw new IOException("Matrix cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
} else {
throw new IOException("Unable to read matrix in binary cell format.", ex);
}
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.
the class GMRCtableBuffer method flushBuffer.
public void flushBuffer(Reporter reporter) throws RuntimeException {
try {
if (_mapBuffer != null) {
// new MatrixIndexes();
MatrixIndexes key = null;
MatrixCell value = new MatrixCell();
for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
CTableMap resultMap = ctable.getValue();
// maintain result dims and nonzeros
for (Integer i : resultIDs) {
_resultNonZeros[i] += resultMap.size();
if (_resultDimsUnknown[i] == (byte) 1) {
_resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
_resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
}
}
// output result data
Iterator<ADoubleEntry> iter = resultMap.getIterator();
while (iter.hasNext()) {
ADoubleEntry e = iter.next();
key = new MatrixIndexes(e.getKey1(), e.getKey2());
value.setValue(e.value);
for (Integer i : resultIDs) _collector.collectOutput(key, value, i, reporter);
}
}
} else if (_blockBuffer != null) {
MatrixIndexes key = new MatrixIndexes(1, 1);
// DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
MatrixBlock outBlock = ctable.getValue();
outBlock.recomputeNonZeros();
// TODO: change hard coding of 1000
int brlen = 1000, bclen = 1000;
int rlen = outBlock.getNumRows();
int clen = outBlock.getNumColumns();
// final output matrix is smaller than a single block
if (rlen <= brlen && clen <= brlen) {
key = new MatrixIndexes(1, 1);
for (Integer i : resultIDs) {
_collector.collectOutput(key, outBlock, i, reporter);
_resultNonZeros[i] += outBlock.getNonZeros();
}
} else {
// Following code is similar to that in DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
// initialize blocks for reuse (at most 4 different blocks required)
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, clen, brlen, bclen, true, outBlock.getNonZeros());
// create and write subblocks of matrix
for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) {
int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;
int row_offset = blockRow * brlen;
int col_offset = blockCol * bclen;
// get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
// copy submatrix to block
outBlock.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
// TODO: skip empty "block"
// append block to sequence file
key.setIndexes(blockRow + 1, blockCol + 1);
for (Integer i : resultIDs) {
_collector.collectOutput(key, block, i, reporter);
_resultNonZeros[i] += block.getNonZeros();
}
// reset block for later reuse
block.reset();
}
}
}
}
} else {
throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
}
} catch (Exception ex) {
throw new RuntimeException("Failed to flush ctable buffer.", ex);
}
// remove existing partial ctables
if (_mapBuffer != null)
_mapBuffer.clear();
else
_blockBuffer.clear();
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.
the class SamplingSortMRInputFormat method writePartitionFile.
/**
* Use the input splits to take samples of the input and generate sample
* keys. By default reads 100,000 keys from 10 locations in the input, sorts
* them and picks N-1 keys to generate N equally sized partitions.
*
* @param conf the job to sample
* @param partFile where to write the output file to
* @return index value
* @throws IOException if something goes wrong
* @throws InstantiationException if InstantiationException occurs
* @throws IllegalAccessException if IllegalAccessException occurs
*/
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException {
SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
Sampler sampler = new Sampler();
Class<? extends WritableComparable> targetKeyClass;
targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class);
// get input converter information
int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);
// indicate whether the matrix value in this mapper is a matrix cell or a matrix block
int partitions = conf.getNumReduceTasks();
long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
int samples = Math.min(10, splits.length);
long recordsPerSample = sampleSize / samples;
int sampleStep = splits.length / samples;
// take N samples from different parts of the input
int totalcount = 0;
for (int i = 0; i < samples; i++) {
SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat.getRecordReader(splits[sampleStep * i], conf, null);
int count = 0;
WritableComparable key = (WritableComparable) reader.createKey();
Writable value = (Writable) reader.createValue();
while (reader.next(key, value) && count < recordsPerSample) {
Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
inputConverter.setBlockSize(brlen, bclen);
inputConverter.convert(key, value);
while (inputConverter.hasNext()) {
Pair pair = inputConverter.next();
if (pair.getKey() instanceof DoubleWritable) {
sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
} else if (pair.getValue() instanceof MatrixCell) {
sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
} else
throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass());
count++;
}
key = (WritableComparable) reader.createKey();
value = (Writable) reader.createValue();
}
totalcount += count;
}
if (// empty input files
totalcount == 0)
sampler.addValue(new DoubleWritable(0));
FileSystem outFs = partFile.getFileSystem(conf);
if (outFs.exists(partFile)) {
outFs.delete(partFile, false);
}
// note: key value always double/null as expected by partitioner
SequenceFile.Writer writer = null;
int index0 = -1;
try {
writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
NullWritable nullValue = NullWritable.get();
int i = 0;
boolean lessthan0 = true;
for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
writer.append(splitValue, nullValue);
if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
index0 = i;
lessthan0 = false;
}
i++;
}
if (lessthan0)
index0 = partitions - 1;
} finally {
IOUtilFunctions.closeSilently(writer);
}
return index0;
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class PairWritableCell method readFields.
@Override
public void readFields(DataInput in) throws IOException {
indexes = new MatrixIndexes();
indexes.readFields(in);
cell = new MatrixCell();
cell.readFields(in);
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class SamplingSortMRInputFormat method writePartitionFile.
/**
* Use the input splits to take samples of the input and generate sample
* keys. By default reads 100,000 keys from 10 locations in the input, sorts
* them and picks N-1 keys to generate N equally sized partitions.
*
* @param conf the job to sample
* @param partFile where to write the output file to
* @return index value
* @throws IOException if something goes wrong
* @throws InstantiationException if InstantiationException occurs
* @throws IllegalAccessException if IllegalAccessException occurs
*/
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile) throws IOException, InstantiationException, IllegalAccessException {
SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
Sampler sampler = new Sampler();
Class<? extends WritableComparable> targetKeyClass;
targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS, WritableComparable.class);
// get input converter information
int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);
// indicate whether the matrix value in this mapper is a matrix cell or a matrix block
int partitions = conf.getNumReduceTasks();
long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
int samples = Math.min(10, splits.length);
long recordsPerSample = sampleSize / samples;
int sampleStep = splits.length / samples;
// take N samples from different parts of the input
int totalcount = 0;
for (int i = 0; i < samples; i++) {
SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat.getRecordReader(splits[sampleStep * i], conf, null);
int count = 0;
WritableComparable key = (WritableComparable) reader.createKey();
Writable value = (Writable) reader.createValue();
while (reader.next(key, value) && count < recordsPerSample) {
Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
inputConverter.setBlockSize(brlen, bclen);
inputConverter.convert(key, value);
while (inputConverter.hasNext()) {
Pair pair = inputConverter.next();
if (pair.getKey() instanceof DoubleWritable) {
sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
} else if (pair.getValue() instanceof MatrixCell) {
sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
} else
throw new IOException("SamplingSortMRInputFormat unsupported key/value class: " + pair.getKey().getClass() + ":" + pair.getValue().getClass());
count++;
}
key = (WritableComparable) reader.createKey();
value = (Writable) reader.createValue();
}
totalcount += count;
}
if (// empty input files
totalcount == 0)
sampler.addValue(new DoubleWritable(0));
FileSystem outFs = partFile.getFileSystem(conf);
if (outFs.exists(partFile)) {
outFs.delete(partFile, false);
}
// note: key value always double/null as expected by partitioner
SequenceFile.Writer writer = null;
int index0 = -1;
try {
writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
NullWritable nullValue = NullWritable.get();
int i = 0;
boolean lessthan0 = true;
for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
writer.append(splitValue, nullValue);
if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
index0 = i;
lessthan0 = false;
}
i++;
}
if (lessthan0)
index0 = partitions - 1;
} finally {
IOUtilFunctions.closeSilently(writer);
}
return index0;
}
Aggregations