Search in sources :

Example 6 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class ResultMergeLocalFile method createBinaryCellStagingFile.

@SuppressWarnings("deprecation")
private static void createBinaryCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(mo.getFileName());
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    LinkedList<Cell> buffer = new LinkedList<>();
    MatrixIndexes key = new MatrixIndexes();
    MatrixCell value = new MatrixCell();
    MatrixCharacteristics mc = mo.getMatrixCharacteristics();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
        try {
            while (reader.next(key, value)) {
                Cell tmp = new Cell(key.getRowIndex(), key.getColumnIndex(), value.getValue());
                buffer.addLast(tmp);
                if (// periodic flush
                buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
                    appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                    buffer.clear();
                }
            }
            // final flush
            if (!buffer.isEmpty()) {
                appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                buffer.clear();
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) LinkedList(java.util.LinkedList) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell)

Example 7 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class DataPartitionerLocal method appendCellBufferToStagingArea.

private void appendCellBufferToStagingArea(String dir, LinkedList<Cell> buffer, int brlen, int bclen) throws IOException {
    HashMap<Long, LinkedList<Cell>> sortedBuffer = new HashMap<>();
    // sort cells in buffer wrt key
    long key = -1;
    for (Cell c : buffer) {
        switch(_format) {
            case ROW_WISE:
                key = c.getRow();
                c.setRow(1);
                break;
            case ROW_BLOCK_WISE:
                key = (c.getRow() - 1) / brlen + 1;
                c.setRow((c.getRow() - 1) % brlen + 1);
                break;
            case COLUMN_WISE:
                key = c.getCol();
                c.setCol(1);
                break;
            case COLUMN_BLOCK_WISE:
                key = (c.getCol() - 1) / bclen + 1;
                c.setCol((c.getCol() - 1) % bclen + 1);
                break;
            default:
        }
        if (!sortedBuffer.containsKey(key))
            sortedBuffer.put(key, new LinkedList<Cell>());
        sortedBuffer.get(key).addLast(c);
    }
    // write lists of cells to local files
    for (Entry<Long, LinkedList<Cell>> e : sortedBuffer.entrySet()) {
        String pdir = LocalFileUtils.checkAndCreateStagingDir(dir + "/" + e.getKey());
        String pfname = pdir + "/" + "block_" + _seq.getNextID();
        StagingFileUtils.writeCellListToLocal(pfname, e.getValue());
    }
}
Also used : HashMap(java.util.HashMap) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) LinkedList(java.util.LinkedList)

Example 8 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class DataPartitionerLocal method partitionBinaryBlock2BinaryCell.

@SuppressWarnings("deprecation")
private void partitionBinaryBlock2BinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    try {
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();
        LinkedList<Cell> buffer = new LinkedList<>();
        for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (// for each block
                reader.next(key, value)) {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();
                    // bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }
                    boolean sparse = value.isInSparseFormat();
                    if (// SPARSE
                    sparse) {
                        Iterator<IJV> iter = value.getSparseBlockIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            Cell tmp = new Cell(row_offset + lcell.getI() + 1, col_offset + lcell.getJ() + 1, lcell.getV());
                            buffer.addLast(tmp);
                        }
                    } else // DENSE
                    {
                        for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
                            double lvalue = value.getValueDenseUnsafe(i, j);
                            if (// for nnz
                            lvalue != 0) {
                                Cell tmp = new Cell(row_offset + i + 1, col_offset + j + 1, lvalue);
                                buffer.addLast(tmp);
                            }
                        }
                    }
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) RecordReader(org.apache.hadoop.mapred.RecordReader) SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IOException(java.io.IOException) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 9 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class DataPartitionerLocal method writeBinaryCellSequenceFileToHDFS.

@SuppressWarnings("deprecation")
public void writeBinaryCellSequenceFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    Path path = new Path(dir + "/" + key);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // beware ca 50ms
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        MatrixIndexes indexes = new MatrixIndexes();
        MatrixCell cell = new MatrixCell();
        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                indexes.setIndexes(c.getRow(), c.getCol());
                cell.setValue(c.getValue());
                writer.append(indexes, cell);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 10 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class DataPartitionerLocal method writeTextCellFileToHDFS.

public void writeTextCellFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    Path path = new Path(dir + "/" + key);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
    try {
        // for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();
        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                sb.append(c.getRow());
                sb.append(' ');
                sb.append(c.getCol());
                sb.append(' ');
                sb.append(c.getValue());
                sb.append('\n');
                out.write(sb.toString());
                sb.setLength(0);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) BufferedWriter(java.io.BufferedWriter)

Aggregations

Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)18 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)18 LinkedList (java.util.LinkedList)14 Path (org.apache.hadoop.fs.Path)14 SequenceFile (org.apache.hadoop.io.SequenceFile)12 File (java.io.File)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 JobConf (org.apache.hadoop.mapred.JobConf)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)8 IOException (java.io.IOException)6 RecordReader (org.apache.hadoop.mapred.RecordReader)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 BufferedWriter (java.io.BufferedWriter)4 OutputStreamWriter (java.io.OutputStreamWriter)4 HashMap (java.util.HashMap)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 InputSplit (org.apache.hadoop.mapred.InputSplit)4 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)4 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)4