Search in sources :

Example 11 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.

the class DataPartitionerLocal method partitionBinaryCell.

@SuppressWarnings("deprecation")
private void partitionBinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    long row = -1;
    long col = -1;
    try {
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // prepare sequence file reader, and write to local staging area
        LinkedList<Cell> buffer = new LinkedList<>();
        MatrixIndexes key = new MatrixIndexes();
        MatrixCell value = new MatrixCell();
        for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) {
                    row = key.getRowIndex();
                    col = key.getColumnIndex();
                    Cell tmp = new Cell(row, col, value.getValue());
                    buffer.addLast(tmp);
                    if (// periodic flush
                    buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }
                // final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        // post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition binary cell matrix.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 12 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.

the class DataPartitionerLocal method writeTextCellFileToHDFS.

public void writeTextCellFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    Path path = new Path(dir + "/" + key);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
    try {
        // for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();
        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                sb.append(c.getRow());
                sb.append(' ');
                sb.append(c.getCol());
                sb.append(' ');
                sb.append(c.getValue());
                sb.append('\n');
                out.write(sb.toString());
                sb.setLength(0);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) BufferedWriter(java.io.BufferedWriter)

Example 13 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.

the class DataPartitionerLocal method writeBinaryCellSequenceFileToHDFS.

@SuppressWarnings("deprecation")
public void writeBinaryCellSequenceFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    Path path = new Path(dir + "/" + key);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // beware ca 50ms
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        MatrixIndexes indexes = new MatrixIndexes();
        MatrixCell cell = new MatrixCell();
        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                indexes.setIndexes(c.getRow(), c.getCol());
                cell.setValue(c.getValue());
                writer.append(indexes, cell);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 14 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.

the class DataPartitionerLocal method partitionBinaryBlock2BinaryCell.

@SuppressWarnings("deprecation")
private void partitionBinaryBlock2BinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    try {
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();
        LinkedList<Cell> buffer = new LinkedList<>();
        for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (// for each block
                reader.next(key, value)) {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();
                    // bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }
                    boolean sparse = value.isInSparseFormat();
                    if (// SPARSE
                    sparse) {
                        Iterator<IJV> iter = value.getSparseBlockIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            Cell tmp = new Cell(row_offset + lcell.getI() + 1, col_offset + lcell.getJ() + 1, lcell.getV());
                            buffer.addLast(tmp);
                        }
                    } else // DENSE
                    {
                        for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
                            double lvalue = value.getValueDenseUnsafe(i, j);
                            if (// for nnz
                            lvalue != 0) {
                                Cell tmp = new Cell(row_offset + i + 1, col_offset + j + 1, lvalue);
                                buffer.addLast(tmp);
                            }
                        }
                    }
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) RecordReader(org.apache.hadoop.mapred.RecordReader) SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IOException(java.io.IOException) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 15 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project incubator-systemml by apache.

the class ResultMergeLocalFile method appendCellBufferToStagingArea.

private static void appendCellBufferToStagingArea(String fnameStaging, long ID, LinkedList<Cell> buffer, int brlen, int bclen) throws IOException {
    HashMap<Long, HashMap<Long, LinkedList<Cell>>> sortedBuffer = new HashMap<>();
    long brow, bcol, row_offset, col_offset;
    for (Cell c : buffer) {
        brow = (c.getRow() - 1) / brlen + 1;
        bcol = (c.getCol() - 1) / bclen + 1;
        row_offset = (brow - 1) * brlen + 1;
        col_offset = (bcol - 1) * bclen + 1;
        c.setRow(c.getRow() - row_offset);
        c.setCol(c.getCol() - col_offset);
        if (!sortedBuffer.containsKey(brow))
            sortedBuffer.put(brow, new HashMap<Long, LinkedList<Cell>>());
        if (!sortedBuffer.get(brow).containsKey(bcol))
            sortedBuffer.get(brow).put(bcol, new LinkedList<Cell>());
        sortedBuffer.get(brow).get(bcol).addLast(c);
    }
    // write lists of cells to local files
    for (Entry<Long, HashMap<Long, LinkedList<Cell>>> e : sortedBuffer.entrySet()) {
        brow = e.getKey();
        for (Entry<Long, LinkedList<Cell>> e2 : e.getValue().entrySet()) {
            bcol = e2.getKey();
            String lname = brow + "_" + bcol;
            String dir = fnameStaging + "/" + lname;
            LocalFileUtils.checkAndCreateStagingDir(dir);
            StagingFileUtils.writeCellListToLocal(dir + "/" + ID, e2.getValue());
        }
    }
}
Also used : HashMap(java.util.HashMap) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) LinkedList(java.util.LinkedList)

Aggregations

Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)18 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)18 LinkedList (java.util.LinkedList)14 Path (org.apache.hadoop.fs.Path)14 SequenceFile (org.apache.hadoop.io.SequenceFile)12 File (java.io.File)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 JobConf (org.apache.hadoop.mapred.JobConf)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)8 IOException (java.io.IOException)6 RecordReader (org.apache.hadoop.mapred.RecordReader)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 BufferedWriter (java.io.BufferedWriter)4 OutputStreamWriter (java.io.OutputStreamWriter)4 HashMap (java.util.HashMap)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 InputSplit (org.apache.hadoop.mapred.InputSplit)4 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)4 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)4