Search in sources :

Example 16 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class ResultMergeLocalFile method appendCellBufferToStagingArea.

private static void appendCellBufferToStagingArea(String fnameStaging, long ID, LinkedList<Cell> buffer, int brlen, int bclen) throws IOException {
    HashMap<Long, HashMap<Long, LinkedList<Cell>>> sortedBuffer = new HashMap<>();
    long brow, bcol, row_offset, col_offset;
    for (Cell c : buffer) {
        brow = (c.getRow() - 1) / brlen + 1;
        bcol = (c.getCol() - 1) / bclen + 1;
        row_offset = (brow - 1) * brlen + 1;
        col_offset = (bcol - 1) * bclen + 1;
        c.setRow(c.getRow() - row_offset);
        c.setCol(c.getCol() - col_offset);
        if (!sortedBuffer.containsKey(brow))
            sortedBuffer.put(brow, new HashMap<Long, LinkedList<Cell>>());
        if (!sortedBuffer.get(brow).containsKey(bcol))
            sortedBuffer.get(brow).put(bcol, new LinkedList<Cell>());
        sortedBuffer.get(brow).get(bcol).addLast(c);
    }
    // write lists of cells to local files
    for (Entry<Long, HashMap<Long, LinkedList<Cell>>> e : sortedBuffer.entrySet()) {
        brow = e.getKey();
        for (Entry<Long, LinkedList<Cell>> e2 : e.getValue().entrySet()) {
            bcol = e2.getKey();
            String lname = brow + "_" + bcol;
            String dir = fnameStaging + "/" + lname;
            LocalFileUtils.checkAndCreateStagingDir(dir);
            StagingFileUtils.writeCellListToLocal(dir + "/" + ID, e2.getValue());
        }
    }
}
Also used : HashMap(java.util.HashMap) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) LinkedList(java.util.LinkedList)

Example 17 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class DataPartitionerLocal method partitionTextCell.

private void partitionTextCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    long row = -1;
    long col = -1;
    try {
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);
        InputSplit[] splits = informat.getSplits(job, 1);
        LinkedList<Cell> buffer = new LinkedList<>();
        LongWritable key = new LongWritable();
        Text value = new Text();
        FastStringTokenizer st = new FastStringTokenizer(' ');
        for (InputSplit split : splits) {
            RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
            try {
                while (reader.next(key, value)) {
                    // reset tokenizer
                    st.reset(value.toString());
                    row = st.nextLong();
                    col = st.nextLong();
                    double lvalue = st.nextDouble();
                    Cell tmp = new Cell(row, col, lvalue);
                    buffer.addLast(tmp);
                    if (// periodic flush
                    buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }
                // final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerTextCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeTextCellFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        // post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition text cell matrix.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FastStringTokenizer(org.apache.sysml.runtime.util.FastStringTokenizer) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 18 with Cell

use of org.apache.sysml.runtime.controlprogram.parfor.util.Cell in project systemml by apache.

the class DataPartitionerLocal method partitionBinaryCell.

@SuppressWarnings("deprecation")
private void partitionBinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    long row = -1;
    long col = -1;
    try {
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // prepare sequence file reader, and write to local staging area
        LinkedList<Cell> buffer = new LinkedList<>();
        MatrixIndexes key = new MatrixIndexes();
        MatrixCell value = new MatrixCell();
        for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (reader.next(key, value)) {
                    row = key.getRowIndex();
                    col = key.getColumnIndex();
                    Cell tmp = new Cell(row, col, value.getValue());
                    buffer.addLast(tmp);
                    if (// periodic flush
                    buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
                        appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                        buffer.clear();
                    }
                }
                // final flush
                if (!buffer.isEmpty()) {
                    appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
                    buffer.clear();
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
        }
    } catch (Exception e) {
        // post-mortem error handling and bounds checking
        if (row < 1 || row > rlen || col < 1 || col > clen) {
            throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        } else
            throw new DMLRuntimeException("Unable to partition binary cell matrix.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Aggregations

Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)18 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)18 LinkedList (java.util.LinkedList)14 Path (org.apache.hadoop.fs.Path)14 SequenceFile (org.apache.hadoop.io.SequenceFile)12 File (java.io.File)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 JobConf (org.apache.hadoop.mapred.JobConf)10 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)8 IOException (java.io.IOException)6 RecordReader (org.apache.hadoop.mapred.RecordReader)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 BufferedWriter (java.io.BufferedWriter)4 OutputStreamWriter (java.io.OutputStreamWriter)4 HashMap (java.util.HashMap)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 InputSplit (org.apache.hadoop.mapred.InputSplit)4 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)4 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)4