Search in sources :

Example 1 with PairWritableBlock

use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.

the class DataPartitionerRemoteSparkMapper method call.

@Override
public Iterator<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
    List<Tuple2<Long, Writable>> ret = new LinkedList<>();
    MatrixIndexes key2 = arg0._1();
    MatrixBlock value2 = arg0._2();
    long row_offset = (key2.getRowIndex() - 1) * _brlen;
    long col_offset = (key2.getColumnIndex() - 1) * _bclen;
    long rows = value2.getNumRows();
    long cols = value2.getNumColumns();
    // bound check per block
    if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1 || col_offset + cols > _clen) {
        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + _rlen + ",1:" + _clen + "].");
    }
    // partition inputs according to partitioning scheme
    switch(_dpf) {
        case ROW_WISE:
            {
                MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
                for (int i = 0; i < rows; i++) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                    tmp.block = blks[i];
                    ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
                }
                break;
            }
        case ROW_BLOCK_WISE:
            {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                tmp.block = new MatrixBlock(value2);
                ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
                break;
            }
        case ROW_BLOCK_WISE_N:
            {
                if (_n >= _brlen) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
                    tmp.block = new MatrixBlock(value2);
                    ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
                } else {
                    for (int i = 0; i < rows; i += _n) {
                        PairWritableBlock tmp = new PairWritableBlock();
                        tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                        tmp.block = value2.slice(i, Math.min(i + (int) _n - 1, value2.getNumRows() - 1), 0, value2.getNumColumns() - 1, new MatrixBlock());
                        ret.add(new Tuple2<Long, Writable>(new Long((row_offset + i) / _n + 1), tmp));
                    }
                }
                break;
            }
        case COLUMN_WISE:
            {
                MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
                for (int i = 0; i < cols; i++) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                    tmp.block = blks[i];
                    ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
                }
                break;
            }
        case COLUMN_BLOCK_WISE:
            {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                tmp.block = new MatrixBlock(value2);
                ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
                break;
            }
        case COLUMN_BLOCK_WISE_N:
            {
                if (_n >= _bclen) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
                    tmp.block = new MatrixBlock(value2);
                    ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
                } else {
                    for (int i = 0; i < cols; i += _n) {
                        PairWritableBlock tmp = new PairWritableBlock();
                        tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                        tmp.block = value2.slice(0, value2.getNumRows() - 1, i, Math.min(i + (int) _n - 1, value2.getNumColumns() - 1), new MatrixBlock());
                        ret.add(new Tuple2<Long, Writable>(new Long((col_offset + i) / _n + 1), tmp));
                    }
                }
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
    }
    return ret.iterator();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) IOException(java.io.IOException) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 2 with PairWritableBlock

use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.

the class DataPartitionerRemoteSparkReducer method call.

@Override
@SuppressWarnings("deprecation")
public void call(Tuple2<Long, Iterable<Writable>> arg0) throws Exception {
    // prepare grouped partition input
    Long key = arg0._1();
    Iterator<Writable> valueList = arg0._2().iterator();
    // write entire partition to binary block sequence file
    SequenceFile.Writer writer = null;
    try {
        // create sequence file writer
        Configuration job = new Configuration(ConfigurationManager.getCachedJobConf());
        Path path = new Path(_fnameNew + File.separator + key);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
        // write individual blocks unordered to output
        while (valueList.hasNext()) {
            PairWritableBlock pair = (PairWritableBlock) valueList.next();
            writer.append(pair.indexes, pair.block);
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) Configuration(org.apache.hadoop.conf.Configuration) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) Writable(org.apache.hadoop.io.Writable)

Example 3 with PairWritableBlock

use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project systemml by apache.

the class DataPartitionerRemoteSparkReducer method call.

@Override
@SuppressWarnings("deprecation")
public void call(Tuple2<Long, Iterable<Writable>> arg0) throws Exception {
    // prepare grouped partition input
    Long key = arg0._1();
    Iterator<Writable> valueList = arg0._2().iterator();
    // write entire partition to binary block sequence file
    SequenceFile.Writer writer = null;
    try {
        // create sequence file writer
        Configuration job = new Configuration(ConfigurationManager.getCachedJobConf());
        Path path = new Path(_fnameNew + File.separator + key);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
        // write individual blocks unordered to output
        while (valueList.hasNext()) {
            PairWritableBlock pair = (PairWritableBlock) valueList.next();
            writer.append(pair.indexes, pair.block);
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) Configuration(org.apache.hadoop.conf.Configuration) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) Writable(org.apache.hadoop.io.Writable)

Example 4 with PairWritableBlock

use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.

the class RemoteDPParForSparkWorker method collectBinaryBlock.

/**
 * Collects a matrixblock partition from a given input iterator over
 * binary blocks.
 *
 * Note it reuses the instance attribute _partition - multiple calls
 * will overwrite the result.
 *
 * @param valueList iterable writables
 * @param reuse matrix block partition for reuse
 * @return matrix block
 * @throws IOException if IOException occurs
 */
private MatrixBlock collectBinaryBlock(Iterable<Writable> valueList, MatrixBlock reuse) throws IOException {
    // which avoids unnecessary copies and reduces memory pressure
    if (valueList instanceof Collection && ((Collection<Writable>) valueList).size() == 1) {
        return ((PairWritableBlock) valueList.iterator().next()).block;
    }
    // default: create or reuse target partition and copy individual partition fragments
    // into this target, including nnz maintenance and potential dense-sparse format change
    MatrixBlock partition = reuse;
    try {
        // reset reuse block, keep configured representation
        if (_tSparseCol)
            partition = new MatrixBlock(_clen, _rlen, true);
        else if (partition != null)
            partition.reset(_rlen, _clen, false);
        else
            partition = new MatrixBlock(_rlen, _clen, false);
        long lnnz = 0;
        for (Writable val : valueList) {
            PairWritableBlock pval = (PairWritableBlock) val;
            int row_offset = (int) (pval.indexes.getRowIndex() - 1) * _brlen;
            int col_offset = (int) (pval.indexes.getColumnIndex() - 1) * _bclen;
            if (// DENSE
            !partition.isInSparseFormat())
                partition.copy(row_offset, row_offset + pval.block.getNumRows() - 1, col_offset, col_offset + pval.block.getNumColumns() - 1, pval.block, false);
            else
                // SPARSE
                partition.appendToSparse(pval.block, row_offset, col_offset);
            lnnz += pval.block.getNonZeros();
        }
        // post-processing: cleanups if required
        if (partition.isInSparseFormat() && _clen > _bclen)
            partition.sortSparseRows();
        partition.setNonZeros(lnnz);
        partition.examSparsity();
    } catch (DMLRuntimeException ex) {
        throw new IOException(ex);
    }
    return partition;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) Collection(java.util.Collection) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 5 with PairWritableBlock

use of org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock in project incubator-systemml by apache.

the class RemoteDPParWorkerReducer method collectBinaryBlock.

/**
 * Collects a matrixblock partition from a given input iterator over
 * binary blocks.
 *
 * Note it reuses the instance attribute _partition - multiple calls
 * will overwrite the result.
 *
 * @param valueList iterable writables
 * @return matrix block
 * @throws IOException if IOException occurs
 */
private MatrixBlock collectBinaryBlock(Iterator<Writable> valueList) throws IOException {
    try {
        // reset reuse block, keep configured representation
        _partition.reset(_rlen, _clen);
        while (valueList.hasNext()) {
            PairWritableBlock pairValue = (PairWritableBlock) valueList.next();
            int row_offset = (int) (pairValue.indexes.getRowIndex() - 1) * _brlen;
            int col_offset = (int) (pairValue.indexes.getColumnIndex() - 1) * _bclen;
            MatrixBlock block = pairValue.block;
            if (// DENSE
            !_partition.isInSparseFormat()) {
                _partition.copy(row_offset, row_offset + block.getNumRows() - 1, col_offset, col_offset + block.getNumColumns() - 1, pairValue.block, false);
            } else // SPARSE
            {
                _partition.appendToSparse(pairValue.block, row_offset, col_offset);
            }
        }
        // final partition cleanup
        cleanupCollectedMatrixPartition(_partition.isInSparseFormat());
    } catch (DMLRuntimeException ex) {
        throw new IOException(ex);
    }
    return _partition;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

PairWritableBlock (org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock)8 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)8 IOException (java.io.IOException)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 Writable (org.apache.hadoop.io.Writable)4 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)4 Collection (java.util.Collection)2 LinkedList (java.util.LinkedList)2 Configuration (org.apache.hadoop.conf.Configuration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 Tuple2 (scala.Tuple2)2