Search in sources :

Example 11 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.

the class ResultMergeLocalFile method createBinaryCellStagingFile.

@SuppressWarnings("deprecation")
private static void createBinaryCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(mo.getFileName());
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    LinkedList<Cell> buffer = new LinkedList<>();
    MatrixIndexes key = new MatrixIndexes();
    MatrixCell value = new MatrixCell();
    MatrixCharacteristics mc = mo.getMatrixCharacteristics();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
        try {
            while (reader.next(key, value)) {
                Cell tmp = new Cell(key.getRowIndex(), key.getColumnIndex(), value.getValue());
                buffer.addLast(tmp);
                if (// periodic flush
                buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
                    appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                    buffer.clear();
                }
            }
            // final flush
            if (!buffer.isEmpty()) {
                appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
                buffer.clear();
            }
        } finally {
            IOUtilFunctions.closeSilently(reader);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) LinkedList(java.util.LinkedList) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell)

Example 12 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.

the class ResultMergeLocalFile method mergeBinaryCellWithoutComp.

@SuppressWarnings("deprecation")
private static void mergeBinaryCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO) {
    try {
        // delete target file if already exists
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        if (ALLOW_COPY_CELLFILES) {
            copyAllFiles(fnameNew, inMO);
            // we're done
            return;
        }
        // actual merge
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fnameNew);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // beware ca 50ms
        SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
        MatrixIndexes key = new MatrixIndexes();
        MatrixCell value = new MatrixCell();
        try {
            for (// read/write all inputs
            MatrixObject in : // read/write all inputs
            inMO) {
                if (LOG.isTraceEnabled())
                    LOG.trace("ResultMerge (local, file): Merge input " + in.hashCode() + " (fname=" + in.getFileName() + ") via stream merge");
                JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
                Path tmpPath = new Path(in.getFileName());
                for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, tmpPath)) {
                    SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, tmpJob);
                    try {
                        while (reader.next(key, value)) {
                            out.append(key, value);
                        }
                    } finally {
                        IOUtilFunctions.closeSilently(reader);
                    }
                }
            }
        } finally {
            IOUtilFunctions.closeSilently(out);
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Unable to merge binary cell results.", ex);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 13 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.

the class ResultMergeLocalFile method createBinaryCellResultFile.

@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MetaDataFormat metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fnameNew);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    MatrixIndexes indexes = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);
    // beware ca 50ms
    SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
            File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
            File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
            MatrixBlock mb = null;
            long row_offset = (brow - 1) * brlen + 1;
            long col_offset = (bcol - 1) * bclen + 1;
            if (dir.exists()) {
                if (// WITH COMPARE BLOCK
                withCompare && dir2.exists()) {
                    // copy only values that are different from the original
                    String[] lnames2 = dir2.list();
                    if (// there should be exactly 1 compare block
                    lnames2.length != 1)
                        throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
                    mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
                    boolean appendOnly = mb.isInSparseFormat();
                    DenseBlock compare = DataConverter.convertToDenseBlock(mb, false);
                    for (String lname : dir.list()) {
                        MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                        mergeWithComp(mb, tmp, compare);
                    }
                    // sort sparse due to append-only
                    if (appendOnly && !_isAccum)
                        mb.sortSparseRows();
                    // change sparsity if required after
                    mb.examSparsity();
                } else // WITHOUT COMPARE BLOCK
                {
                    // copy all non-zeros from all workers
                    boolean appendOnly = false;
                    for (String lname : dir.list()) {
                        if (mb == null) {
                            mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            appendOnly = mb.isInSparseFormat();
                        } else {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            mergeWithoutComp(mb, tmp, appendOnly);
                        }
                    }
                    // sort sparse due to append-only
                    if (appendOnly && !_isAccum)
                        mb.sortSparseRows();
                    // change sparsity if required after
                    mb.examSparsity();
                }
            }
            // write the block to binary cell
            if (mb != null) {
                if (mb.isInSparseFormat()) {
                    Iterator<IJV> iter = mb.getSparseBlockIterator();
                    while (iter.hasNext()) {
                        IJV lcell = iter.next();
                        indexes.setIndexes(row_offset + lcell.getI(), col_offset + lcell.getJ());
                        cell.setValue(lcell.getV());
                        out.append(indexes, cell);
                        written = true;
                    }
                } else {
                    for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
                        double lvalue = mb.getValueDenseUnsafe(i, j);
                        if (// for nnz
                        lvalue != 0) {
                            indexes.setIndexes(row_offset + i, col_offset + j);
                            cell.setValue(lvalue);
                            out.append(indexes, cell);
                            written = true;
                        }
                    }
                }
            }
        }
        if (!written)
            out.append(indexes, cell);
    } finally {
        IOUtilFunctions.closeSilently(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Iterator(java.util.Iterator) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 14 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.

the class GMRCtableBuffer method flushBuffer.

public void flushBuffer(Reporter reporter) throws RuntimeException {
    try {
        if (_mapBuffer != null) {
            // new MatrixIndexes();
            MatrixIndexes key = null;
            MatrixCell value = new MatrixCell();
            for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
                ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
                CTableMap resultMap = ctable.getValue();
                // maintain result dims and nonzeros
                for (Integer i : resultIDs) {
                    _resultNonZeros[i] += resultMap.size();
                    if (_resultDimsUnknown[i] == (byte) 1) {
                        _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
                        _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
                    }
                }
                // output result data
                Iterator<ADoubleEntry> iter = resultMap.getIterator();
                while (iter.hasNext()) {
                    ADoubleEntry e = iter.next();
                    key = new MatrixIndexes(e.getKey1(), e.getKey2());
                    value.setValue(e.value);
                    for (Integer i : resultIDs) _collector.collectOutput(key, value, i, reporter);
                }
            }
        } else if (_blockBuffer != null) {
            MatrixIndexes key = new MatrixIndexes(1, 1);
            // DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
            for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
                ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
                MatrixBlock outBlock = ctable.getValue();
                outBlock.recomputeNonZeros();
                // TODO: change hard coding of 1000
                int brlen = 1000, bclen = 1000;
                int rlen = outBlock.getNumRows();
                int clen = outBlock.getNumColumns();
                // final output matrix is smaller than a single block
                if (rlen <= brlen && clen <= brlen) {
                    key = new MatrixIndexes(1, 1);
                    for (Integer i : resultIDs) {
                        _collector.collectOutput(key, outBlock, i, reporter);
                        _resultNonZeros[i] += outBlock.getNonZeros();
                    }
                } else {
                    // Following code is similar to that in DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
                    // initialize blocks for reuse (at most 4 different blocks required)
                    MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, clen, brlen, bclen, true, outBlock.getNonZeros());
                    // create and write subblocks of matrix
                    for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
                        for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) {
                            int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
                            int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;
                            int row_offset = blockRow * brlen;
                            int col_offset = blockCol * bclen;
                            // get reuse matrix block
                            MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
                            // copy submatrix to block
                            outBlock.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
                            // TODO: skip empty "block"
                            // append block to sequence file
                            key.setIndexes(blockRow + 1, blockCol + 1);
                            for (Integer i : resultIDs) {
                                _collector.collectOutput(key, block, i, reporter);
                                _resultNonZeros[i] += block.getNonZeros();
                            }
                            // reset block for later reuse
                            block.reset();
                        }
                    }
                }
            }
        } else {
            throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
        }
    } catch (Exception ex) {
        throw new RuntimeException("Failed to flush ctable buffer.", ex);
    }
    // remove existing partial ctables
    if (_mapBuffer != null)
        _mapBuffer.clear();
    else
        _blockBuffer.clear();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ADoubleEntry(org.apache.sysml.runtime.util.LongLongDoubleHashMap.ADoubleEntry) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ADoubleEntry(org.apache.sysml.runtime.util.LongLongDoubleHashMap.ADoubleEntry) Entry(java.util.Map.Entry) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CTableMap(org.apache.sysml.runtime.matrix.data.CTableMap) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell)

Example 15 with MatrixCell

use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.

the class DataPartitionerLocal method writeBinaryCellSequenceFileToHDFS.

@SuppressWarnings("deprecation")
public void writeBinaryCellSequenceFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    Path path = new Path(dir + "/" + key);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // beware ca 50ms
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        MatrixIndexes indexes = new MatrixIndexes();
        MatrixCell cell = new MatrixCell();
        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                indexes.setIndexes(c.getRow(), c.getCol());
                cell.setValue(c.getValue());
                writer.append(indexes, cell);
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Cell(org.apache.sysml.runtime.controlprogram.parfor.util.Cell) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Aggregations

MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)35 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)33 SequenceFile (org.apache.hadoop.io.SequenceFile)21 FileSystem (org.apache.hadoop.fs.FileSystem)19 Path (org.apache.hadoop.fs.Path)17 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)17 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)13 IOException (java.io.IOException)12 JobConf (org.apache.hadoop.mapred.JobConf)11 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)11 BufferedWriter (java.io.BufferedWriter)7 File (java.io.File)7 OutputStreamWriter (java.io.OutputStreamWriter)7 ArrayList (java.util.ArrayList)6 RecordReader (org.apache.hadoop.mapred.RecordReader)6 Cell (org.apache.sysml.runtime.controlprogram.parfor.util.Cell)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)5 LinkedList (java.util.LinkedList)4 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)4 CTableMap (org.apache.sysml.runtime.matrix.data.CTableMap)4