use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.
the class ResultMergeLocalFile method createBinaryCellStagingFile.
@SuppressWarnings("deprecation")
private static void createBinaryCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(mo.getFileName());
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
LinkedList<Cell> buffer = new LinkedList<>();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
Cell tmp = new Cell(key.getRowIndex(), key.getColumnIndex(), value.getValue());
buffer.addLast(tmp);
if (// periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
}
// final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.
the class DataPartitionerLocal method appendCellBufferToStagingArea.
private void appendCellBufferToStagingArea(String dir, LinkedList<Cell> buffer, int brlen, int bclen) throws IOException {
HashMap<Long, LinkedList<Cell>> sortedBuffer = new HashMap<>();
// sort cells in buffer wrt key
long key = -1;
for (Cell c : buffer) {
switch(_format) {
case ROW_WISE:
key = c.getRow();
c.setRow(1);
break;
case ROW_BLOCK_WISE:
key = (c.getRow() - 1) / brlen + 1;
c.setRow((c.getRow() - 1) % brlen + 1);
break;
case COLUMN_WISE:
key = c.getCol();
c.setCol(1);
break;
case COLUMN_BLOCK_WISE:
key = (c.getCol() - 1) / bclen + 1;
c.setCol((c.getCol() - 1) % bclen + 1);
break;
default:
}
if (!sortedBuffer.containsKey(key))
sortedBuffer.put(key, new LinkedList<Cell>());
sortedBuffer.get(key).addLast(c);
}
// write lists of cells to local files
for (Entry<Long, LinkedList<Cell>> e : sortedBuffer.entrySet()) {
String pdir = LocalFileUtils.checkAndCreateStagingDir(dir + "/" + e.getKey());
String pfname = pdir + "/" + "block_" + _seq.getNextID();
StagingFileUtils.writeCellListToLocal(pfname, e.getValue());
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.
the class DataPartitionerLocal method partitionBinaryBlock2BinaryCell.
@SuppressWarnings("deprecation")
private void partitionBinaryBlock2BinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
try {
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// prepare sequence file reader, and write to local staging area
MatrixIndexes key = new MatrixIndexes();
MatrixBlock value = new MatrixBlock();
LinkedList<Cell> buffer = new LinkedList<>();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (// for each block
reader.next(key, value)) {
long row_offset = (key.getRowIndex() - 1) * brlen;
long col_offset = (key.getColumnIndex() - 1) * bclen;
long rows = value.getNumRows();
long cols = value.getNumColumns();
// bound check per block
if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
}
boolean sparse = value.isInSparseFormat();
if (// SPARSE
sparse) {
Iterator<IJV> iter = value.getSparseBlockIterator();
while (iter.hasNext()) {
IJV lcell = iter.next();
Cell tmp = new Cell(row_offset + lcell.getI() + 1, col_offset + lcell.getJ() + 1, lcell.getV());
buffer.addLast(tmp);
}
} else // DENSE
{
for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
double lvalue = value.getValueDenseUnsafe(i, j);
if (// for nnz
lvalue != 0) {
Cell tmp = new Cell(row_offset + i + 1, col_offset + j + 1, lvalue);
buffer.addLast(tmp);
}
}
}
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
}
} catch (Exception e) {
throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.
the class DataPartitionerLocal method writeBinaryCellSequenceFileToHDFS.
@SuppressWarnings("deprecation")
public void writeBinaryCellSequenceFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
long key = getKeyFromFilePath(lpdir);
Path path = new Path(dir + "/" + key);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// beware ca 50ms
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
try {
MatrixIndexes indexes = new MatrixIndexes();
MatrixCell cell = new MatrixCell();
String[] fnameBlocks = new File(lpdir).list();
for (String fnameBlock : fnameBlocks) {
LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
for (Cell c : tmp) {
indexes.setIndexes(c.getRow(), c.getCol());
cell.setValue(c.getValue());
writer.append(indexes, cell);
}
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.
the class DataPartitionerLocal method writeTextCellFileToHDFS.
public void writeTextCellFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
long key = getKeyFromFilePath(lpdir);
Path path = new Path(dir + "/" + key);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
try {
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
String[] fnameBlocks = new File(lpdir).list();
for (String fnameBlock : fnameBlocks) {
LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
for (Cell c : tmp) {
sb.append(c.getRow());
sb.append(' ');
sb.append(c.getCol());
sb.append(' ');
sb.append(c.getValue());
sb.append('\n');
out.write(sb.toString());
sb.setLength(0);
}
}
} finally {
IOUtilFunctions.closeSilently(out);
}
}
Aggregations