Search in sources :

Example 76 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class WriterBinaryBlock method writeDiagBinaryBlockMatrixToHDFS.

@SuppressWarnings("deprecation")
protected final void writeDiagBinaryBlockMatrixToHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock src, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
    boolean sparse = src.isInSparseFormat();
    // 1) create sequence file writer, with right replication factor
    // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
    SequenceFile.Writer writer = null;
    if (// if replication specified (otherwise default)
    _replication > 0) {
        // copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
    } else {
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
    }
    try {
        // 2) bound check for src block
        if (src.getNumRows() > rlen || src.getNumColumns() > clen) {
            throw new IOException("Matrix block [1:" + src.getNumRows() + ",1:" + src.getNumColumns() + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        }
        // 3) reblock and write
        MatrixIndexes indexes = new MatrixIndexes();
        if (// opt for single block
        rlen <= brlen && clen <= bclen) {
            // directly write single block
            indexes.setIndexes(1, 1);
            writer.append(indexes, src);
        } else // general case
        {
            // initialize blocks for reuse (at most 4 different blocks required)
            MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros());
            MatrixBlock emptyBlock = new MatrixBlock();
            // create and write subblocks of matrix
            for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = 0; blockCol < (int) Math.ceil(src.getNumColumns() / (double) bclen); blockCol++) {
                int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen;
                int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen;
                MatrixBlock block = null;
                if (// block on diagonal
                blockRow == blockCol) {
                    int row_offset = blockRow * brlen;
                    int col_offset = blockCol * bclen;
                    // get reuse matrix block
                    block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
                    // copy submatrix to block
                    src.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
                } else // empty block (not on diagonal)
                {
                    block = emptyBlock;
                    block.reset(maxRow, maxCol);
                }
                // append block to sequence file
                indexes.setIndexes(blockRow + 1, blockCol + 1);
                writer.append(indexes, block);
                // reset block for later reuse
                if (blockRow != blockCol)
                    block.reset();
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IOException(java.io.IOException)

Example 77 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class WriterBinaryBlock method writeEmptyMatrixToHDFS.

@Override
@SuppressWarnings("deprecation")
public final void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    SequenceFile.Writer writer = null;
    try {
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
        MatrixIndexes index = new MatrixIndexes(1, 1);
        MatrixBlock block = new MatrixBlock((int) Math.max(Math.min(rlen, brlen), 1), (int) Math.max(Math.min(clen, bclen), 1), true);
        writer.append(index, block);
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf)

Example 78 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class DataGenMapper method configure.

@Override
public void configure(JobConf job) {
    super.configure(job);
    // initialize num_inst matrix indexes and blocks for reuse
    indexes = new MatrixIndexes[representativeMatrixes.size()];
    block = new MatrixBlock[representativeMatrixes.size()];
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        indexes[i] = new MatrixIndexes();
        block[i] = new MatrixBlock();
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 79 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class DistributedCacheInput method readDataBlocks.

private void readDataBlocks(int rowBlockIndex, int colBlockIndex) {
    // get filename for rowblock/colblock
    String fname = _localFilePath.toString();
    if (isPartitioned())
        fname = getPartitionFileName(rowBlockIndex, colBlockIndex);
    // read matrix partition (or entire vector)
    try {
        ReaderBinaryBlock reader = (ReaderBinaryBlock) MatrixReaderFactory.createMatrixReader(InputInfo.BinaryBlockInputInfo);
        reader.setLocalFS(!MRBaseForCommonInstructions.isJobLocal);
        ArrayList<IndexedMatrixValue> tmp = reader.readIndexedMatrixBlocksFromHDFS(fname, _rlen, _clen, _brlen, _bclen);
        int rowBlocks = (int) Math.ceil(_rlen / (double) _brlen);
        int colBlocks = (int) Math.ceil(_clen / (double) _bclen);
        if (dataBlocks == null)
            dataBlocks = new IndexedMatrixValue[rowBlocks][colBlocks];
        for (IndexedMatrixValue val : tmp) {
            MatrixIndexes idx = val.getIndexes();
            dataBlocks[(int) idx.getRowIndex() - 1][(int) idx.getColumnIndex() - 1] = val;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
}
Also used : ReaderBinaryBlock(org.apache.sysml.runtime.io.ReaderBinaryBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 80 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class MMCJMRCache method allocateBuffer.

@SuppressWarnings("unchecked")
protected void allocateBuffer(int buffCapacity, Class<? extends MatrixValue> valueClass, boolean buffMap) throws IllegalAccessException, InstantiationException {
    _bufferCapacity = buffCapacity;
    _buffer = new Pair[_bufferCapacity];
    for (int i = 0; i < _bufferCapacity; i++) _buffer[i] = new Pair<>(new MatrixIndexes(), valueClass.newInstance());
    if (buffMap)
        _bufferMap = new HashMap<>();
}
Also used : HashMap(java.util.HashMap) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Pair(org.apache.sysml.runtime.matrix.data.Pair)

Aggregations

MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)165 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)142 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)70 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)41 Path (org.apache.hadoop.fs.Path)24 SequenceFile (org.apache.hadoop.io.SequenceFile)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)20 FileSystem (org.apache.hadoop.fs.FileSystem)20 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)19 Tuple2 (scala.Tuple2)19 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)17 JobConf (org.apache.hadoop.mapred.JobConf)14 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)11 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)10 File (java.io.File)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9