Search in sources :

Example 71 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class MapperBase method commonMap.

@SuppressWarnings("unchecked")
protected void commonMap(Writable rawKey, Writable rawValue, OutputCollector<Writable, Writable> out, Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();
    // for each representative matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        byte thisMatrix = representativeMatrixes.get(i);
        // convert the record into the right format for the representative matrix
        inputConverter.setBlockSize(brlens[i], bclens[i]);
        inputConverter.convert(rawKey, rawValue);
        // apply unary instructions on the converted indexes and values
        while (inputConverter.hasNext()) {
            Pair<MatrixIndexes, MatrixValue> pair = inputConverter.next();
            MatrixIndexes indexes = pair.getKey();
            MatrixValue value = pair.getValue();
            checkValidity(indexes, value, i);
            // put the input in the cache
            cachedValues.reset();
            cachedValues.set(thisMatrix, indexes, value);
            // special operations for individual mapp type
            specialOperationsForActualMap(i, out, reporter);
        }
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
Also used : MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 72 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class ReblockBuffer method flushBufferToBinaryBlocks.

public void flushBufferToBinaryBlocks(ArrayList<IndexedMatrixValue> outList) throws IOException, DMLRuntimeException {
    if (_count == 0)
        return;
    // Step 1) sort reblock buffer (blockwise, no in-block sorting!)
    Arrays.sort(_buff, 0, _count, new ReblockBufferComparator());
    // Step 2) scan for number of created blocks
    // number of blocks in buffer
    long numBlocks = 0;
    // current block indexes
    long cbi = -1, cbj = -1;
    for (int i = 0; i < _count; i++) {
        long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
        long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
        // switch to next block
        if (bi != cbi || bj != cbj) {
            cbi = bi;
            cbj = bj;
            numBlocks++;
        }
    }
    // Step 3) output blocks
    boolean sparse = MatrixBlock.evalSparseFormatInMemory(_brlen, _bclen, _count / numBlocks);
    MatrixIndexes tmpIx = new MatrixIndexes();
    MatrixBlock tmpBlock = new MatrixBlock();
    // put values into block and output
    // current block indexes
    cbi = -1;
    // current block indexes
    cbj = -1;
    for (int i = 0; i < _count; i++) {
        long bi = UtilFunctions.computeBlockIndex(_buff[i][0], _brlen);
        long bj = UtilFunctions.computeBlockIndex(_buff[i][1], _bclen);
        // output block and switch to next index pair
        if (bi != cbi || bj != cbj) {
            outputBlock(outList, tmpIx, tmpBlock);
            cbi = bi;
            cbj = bj;
            tmpIx = new MatrixIndexes(bi, bj);
            tmpBlock = new MatrixBlock(UtilFunctions.computeBlockSize(_rlen, bi, _brlen), UtilFunctions.computeBlockSize(_clen, bj, _bclen), sparse);
        }
        int ci = UtilFunctions.computeCellInBlock(_buff[i][0], _brlen);
        int cj = UtilFunctions.computeCellInBlock(_buff[i][1], _bclen);
        double tmp = Double.longBitsToDouble(_buff[i][2]);
        tmpBlock.appendValue(ci, cj, tmp);
    }
    // output last block
    outputBlock(outList, tmpIx, tmpBlock);
    _count = 0;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 73 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class ReblockMapper method processReblockInMapperAndOutput.

protected void processReblockInMapperAndOutput(int index, OutputCollector<Writable, Writable> out) throws IOException {
    for (ReblockInstruction ins : reblock_instructions.get(index)) {
        ArrayList<IndexedMatrixValue> ixvList = cachedValues.get(ins.input);
        if (ixvList != null) {
            for (IndexedMatrixValue inValue : ixvList) {
                if (inValue == null)
                    continue;
                // get buffer
                ReblockBuffer rbuff = buffer.get(ins.output);
                if (rbuff == null) {
                    MatrixCharacteristics mc = dimensionsOut.get(ins.output);
                    rbuff = new ReblockBuffer(buffersize, mc.getRows(), mc.getCols(), ins.brlen, ins.bclen);
                    buffer.put(ins.output, rbuff);
                }
                // append cells and flush buffer if required
                MatrixValue mval = inValue.getValue();
                if (mval instanceof MatrixBlock) {
                    MatrixIndexes inIx = inValue.getIndexes();
                    MatrixCharacteristics mc = dimensionsIn.get(ins.input);
                    long row_offset = (inIx.getRowIndex() - 1) * mc.getRowsPerBlock() + 1;
                    long col_offset = (inIx.getColumnIndex() - 1) * mc.getColsPerBlock() + 1;
                    // append entire block incl. flush on demand
                    rbuff.appendBlock(row_offset, col_offset, (MatrixBlock) mval, ins.output, out);
                } else // if( mval instanceof MatrixCell )
                {
                    rbuff.appendCell(inValue.getIndexes().getRowIndex(), inValue.getIndexes().getColumnIndex(), ((MatrixCell) mval).getValue());
                    // flush buffer if necessary
                    if (rbuff.getSize() >= rbuff.getCapacity())
                        rbuff.flushBuffer(ins.output, out);
                }
            }
        }
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 74 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class IndexSortStitchupMapper method configure.

@Override
public void configure(JobConf job) {
    super.configure(job);
    _offsets = parseOffsets(job.get(SortMR.SORT_INDEXES_OFFSETS));
    _rlen = MRJobConfiguration.getNumRows(job, (byte) 0);
    _brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
    _tmpIx = new MatrixIndexes();
    _tmpBlk = new MatrixBlock((int) _brlen, 1, false);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 75 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class WriterBinaryBlock method writeBinaryBlockMatrixToSequenceFile.

@SuppressWarnings("deprecation")
protected final void writeBinaryBlockMatrixToSequenceFile(Path path, JobConf job, FileSystem fs, MatrixBlock src, int brlen, int bclen, int rl, int ru) throws IOException {
    boolean sparse = src.isInSparseFormat();
    int rlen = src.getNumRows();
    int clen = src.getNumColumns();
    // 1) create sequence file writer, with right replication factor
    // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
    SequenceFile.Writer writer = null;
    if (// if replication specified (otherwise default)
    _replication > 0) {
        // copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), (short) _replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata());
    } else {
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
    }
    try {
        // 2) bound check for src block
        if (src.getNumRows() > rlen || src.getNumColumns() > clen) {
            throw new IOException("Matrix block [1:" + src.getNumRows() + ",1:" + src.getNumColumns() + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        }
        // 3) reblock and write
        MatrixIndexes indexes = new MatrixIndexes();
        if (// opt for single block
        rlen <= brlen && clen <= bclen && rl == 0) {
            // directly write single block
            indexes.setIndexes(1, 1);
            writer.append(indexes, src);
        } else // general case
        {
            // initialize blocks for reuse (at most 4 different blocks required)
            MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros());
            // create and write subblocks of matrix
            for (int blockRow = rl / brlen; blockRow < (int) Math.ceil(ru / (double) brlen); blockRow++) for (int blockCol = 0; blockCol < (int) Math.ceil(src.getNumColumns() / (double) bclen); blockCol++) {
                int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen;
                int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen;
                int row_offset = blockRow * brlen;
                int col_offset = blockCol * bclen;
                // get reuse matrix block
                MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
                // copy submatrix to block
                src.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
                // append block to sequence file
                indexes.setIndexes(blockRow + 1, blockCol + 1);
                writer.append(indexes, block);
                // reset block for later reuse
                block.reset();
            }
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IOException(java.io.IOException)

Aggregations

MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)165 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)142 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)70 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)41 Path (org.apache.hadoop.fs.Path)24 SequenceFile (org.apache.hadoop.io.SequenceFile)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)20 FileSystem (org.apache.hadoop.fs.FileSystem)20 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)19 Tuple2 (scala.Tuple2)19 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)17 JobConf (org.apache.hadoop.mapred.JobConf)14 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)11 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)10 File (java.io.File)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9