use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.
the class DataPartitionerLocal method partitionBinaryCell.
@SuppressWarnings("deprecation")
private void partitionBinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
long row = -1;
long col = -1;
try {
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// prepare sequence file reader, and write to local staging area
LinkedList<Cell> buffer = new LinkedList<>();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
row = key.getRowIndex();
col = key.getColumnIndex();
Cell tmp = new Cell(row, col, value.getValue());
buffer.addLast(tmp);
if (// periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
}
// final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
}
} catch (Exception e) {
// post-mortem error handling and bounds checking
if (row < 1 || row > rlen || col < 1 || col > clen) {
throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
} else
throw new DMLRuntimeException("Unable to partition binary cell matrix.", e);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.
the class DataPartitionerLocal method writeTextCellFileToHDFS.
public void writeTextCellFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
long key = getKeyFromFilePath(lpdir);
Path path = new Path(dir + "/" + key);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
try {
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
String[] fnameBlocks = new File(lpdir).list();
for (String fnameBlock : fnameBlocks) {
LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
for (Cell c : tmp) {
sb.append(c.getRow());
sb.append(' ');
sb.append(c.getCol());
sb.append(' ');
sb.append(c.getValue());
sb.append('\n');
out.write(sb.toString());
sb.setLength(0);
}
}
} finally {
IOUtilFunctions.closeSilently(out);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.
the class DataPartitionerLocal method writeBinaryCellSequenceFileToHDFS.
@SuppressWarnings("deprecation")
public void writeBinaryCellSequenceFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
long key = getKeyFromFilePath(lpdir);
Path path = new Path(dir + "/" + key);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// beware ca 50ms
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
try {
MatrixIndexes indexes = new MatrixIndexes();
MatrixCell cell = new MatrixCell();
String[] fnameBlocks = new File(lpdir).list();
for (String fnameBlock : fnameBlocks) {
LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
for (Cell c : tmp) {
indexes.setIndexes(c.getRow(), c.getCol());
cell.setValue(c.getValue());
writer.append(indexes, cell);
}
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.
the class DataPartitionerLocal method partitionBinaryBlock2BinaryCell.
@SuppressWarnings("deprecation")
private void partitionBinaryBlock2BinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
try {
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// prepare sequence file reader, and write to local staging area
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
LinkedList<Cell> buffer = new LinkedList<>();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (// for each block
reader.next(key, value)) {
long row_offset = (key.getRowIndex() - 1) * brlen;
long col_offset = (key.getColumnIndex() - 1) * bclen;
long rows = value.getNumRows();
long cols = value.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
}
boolean sparse = value.isInSparseFormat();
if (// SPARSE
sparse) {
Iterator<IJV> iter = value.getSparseBlockIterator();
while (iter.hasNext()) {
IJV lcell = iter.next();
Cell tmp = new Cell(row_offset + lcell.getI() + 1, col_offset + lcell.getJ() + 1, lcell.getV());
buffer.addLast(tmp);
}
} else // DENSE
{
for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
double lvalue = value.getValueDenseUnsafe(i, j);
if (// for nnz
lvalue != 0) {
Cell tmp = new Cell(row_offset + i + 1, col_offset + j + 1, lvalue);
buffer.addLast(tmp);
}
}
}
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
}
} catch (Exception e) {
throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.
the class ResultMergeLocalFile method appendCellBufferToStagingArea.
private static void appendCellBufferToStagingArea(String fnameStaging, long ID, LinkedList<Cell> buffer, int brlen, int bclen) throws IOException {
HashMap<Long, HashMap<Long, LinkedList<Cell>>> sortedBuffer = new HashMap<>();
long brow, bcol, row_offset, col_offset;
for (Cell c : buffer) {
brow = (c.getRow() - 1) / brlen + 1;
bcol = (c.getCol() - 1) / bclen + 1;
row_offset = (brow - 1) * brlen + 1;
col_offset = (bcol - 1) * bclen + 1;
c.setRow(c.getRow() - row_offset);
c.setCol(c.getCol() - col_offset);
if (!sortedBuffer.containsKey(brow))
sortedBuffer.put(brow, new HashMap<Long, LinkedList<Cell>>());
if (!sortedBuffer.get(brow).containsKey(bcol))
sortedBuffer.get(brow).put(bcol, new LinkedList<Cell>());
sortedBuffer.get(brow).get(bcol).addLast(c);
}
// write lists of cells to local files
for (Entry<Long, HashMap<Long, LinkedList<Cell>>> e : sortedBuffer.entrySet()) {
brow = e.getKey();
for (Entry<Long, LinkedList<Cell>> e2 : e.getValue().entrySet()) {
bcol = e2.getKey();
String lname = brow + "_" + bcol;
String dir = fnameStaging + "/" + lname;
LocalFileUtils.checkAndCreateStagingDir(dir);
StagingFileUtils.writeCellListToLocal(dir + "/" + ID, e2.getValue());
}
}
}
Aggregations