Search in sources :

Example 96 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class WriterBinaryBlock method writePartitionedBinaryBlockMatrixToHDFS.

@SuppressWarnings("deprecation")
public final void writePartitionedBinaryBlockMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen, PDataPartitionFormat pformat) throws IOException, DMLRuntimeException {
    boolean sparse = src.isInSparseFormat();
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);
    //initialize blocks for reuse (at most 4 different blocks required)
    MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros());
    switch(pformat) {
        case ROW_BLOCK_WISE_N:
            {
                long numBlocks = ((rlen - 1) / brlen) + 1;
                long numPartBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / clen / brlen);
                int count = 0;
                for (int k = 0; k < numBlocks; k += numPartBlocks) {
                    // 1) create sequence file writer, with right replication factor 
                    // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
                    Path path2 = new Path(path.toString() + File.separator + (++count));
                    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path2, MatrixIndexes.class, MatrixBlock.class);
                    //3) reblock and write
                    try {
                        MatrixIndexes indexes = new MatrixIndexes();
                        //create and write subblocks of matrix
                        for (int blockRow = k; blockRow < Math.min((int) Math.ceil(src.getNumRows() / (double) brlen), k + numPartBlocks); blockRow++) for (int blockCol = 0; blockCol < (int) Math.ceil(src.getNumColumns() / (double) bclen); blockCol++) {
                            int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen;
                            int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen;
                            int row_offset = blockRow * brlen;
                            int col_offset = blockCol * bclen;
                            //get reuse matrix block
                            MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
                            //copy submatrix to block
                            src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
                            //append block to sequence file
                            indexes.setIndexes(blockRow + 1, blockCol + 1);
                            writer.append(indexes, block);
                            //reset block for later reuse
                            block.reset();
                        }
                    } finally {
                        IOUtilFunctions.closeSilently(writer);
                    }
                }
                break;
            }
        case COLUMN_BLOCK_WISE_N:
            {
                long numBlocks = ((clen - 1) / bclen) + 1;
                long numPartBlocks = (long) Math.ceil(((double) DistributedCacheInput.PARTITION_SIZE) / rlen / bclen);
                int count = 0;
                for (int k = 0; k < numBlocks; k += numPartBlocks) {
                    // 1) create sequence file writer, with right replication factor 
                    // (config via MRConfigurationNames.DFS_REPLICATION not possible since sequence file internally calls fs.getDefaultReplication())
                    Path path2 = new Path(path.toString() + File.separator + (++count));
                    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path2, MatrixIndexes.class, MatrixBlock.class);
                    //3) reblock and write
                    try {
                        MatrixIndexes indexes = new MatrixIndexes();
                        //create and write subblocks of matrix
                        for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = k; blockCol < Math.min((int) Math.ceil(src.getNumColumns() / (double) bclen), k + numPartBlocks); blockCol++) {
                            int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen;
                            int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen;
                            int row_offset = blockRow * brlen;
                            int col_offset = blockCol * bclen;
                            //get reuse matrix block
                            MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
                            //copy submatrix to block
                            src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
                            //append block to sequence file
                            indexes.setIndexes(blockRow + 1, blockCol + 1);
                            writer.append(indexes, block);
                            //reset block for later reuse
                            block.reset();
                        }
                    } finally {
                        IOUtilFunctions.closeSilently(writer);
                    }
                }
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported partition format for distributed cache input: " + pformat);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SequenceFile(org.apache.hadoop.io.SequenceFile) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 97 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class WriterBinaryCell method writeEmptyMatrixToHDFS.

@Override
@SuppressWarnings("deprecation")
public void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    SequenceFile.Writer writer = null;
    try {
        writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
        MatrixIndexes index = new MatrixIndexes(1, 1);
        MatrixCell cell = new MatrixCell(0);
        writer.append(index, cell);
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
    IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) JobConf(org.apache.hadoop.mapred.JobConf)

Example 98 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class WriterBinaryCell method writeBinaryCellMatrixToHDFS.

@SuppressWarnings("deprecation")
protected void writeBinaryCellMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen) throws IOException {
    boolean sparse = src.isInSparseFormat();
    boolean entriesWritten = false;
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    MatrixIndexes indexes = new MatrixIndexes();
    MatrixCell cell = new MatrixCell();
    int rows = src.getNumRows();
    int cols = src.getNumColumns();
    try {
        //bound check per block
        if (rows > rlen || cols > clen) {
            throw new IOException("Matrix block [1:" + rows + ",1:" + cols + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
        }
        if (//SPARSE
        sparse) {
            Iterator<IJV> iter = src.getSparseBlockIterator();
            while (iter.hasNext()) {
                IJV lcell = iter.next();
                indexes.setIndexes(lcell.getI() + 1, lcell.getJ() + 1);
                cell.setValue(lcell.getV());
                writer.append(indexes, cell);
                entriesWritten = true;
            }
        } else //DENSE
        {
            for (int i = 0; i < rows; i++) for (int j = 0; j < cols; j++) {
                double lvalue = src.getValueDenseUnsafe(i, j);
                if (//for nnz
                lvalue != 0) {
                    indexes.setIndexes(i + 1, j + 1);
                    cell.setValue(lvalue);
                    writer.append(indexes, cell);
                    entriesWritten = true;
                }
            }
        }
        //handle empty result
        if (!entriesWritten) {
            writer.append(new MatrixIndexes(1, 1), new MatrixCell(0));
        }
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }
}
Also used : SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) IOException(java.io.IOException)

Example 99 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class CSVWriteMapper method map.

@Override
@SuppressWarnings("unchecked")
public void map(Writable rawKey, Writable rawValue, OutputCollector<TaggedFirstSecondIndexes, MatrixBlock> out, Reporter reporter) throws IOException {
    long start = System.currentTimeMillis();
    //for each represenattive matrix, read the record and apply instructions
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        //convert the record into the right format for the representatice matrix
        inputConverter.setBlockSize(brlens[i], bclens[i]);
        inputConverter.convert(rawKey, rawValue);
        byte thisMatrix = representativeMatrixes.get(i);
        //apply unary instructions on the converted indexes and values
        while (inputConverter.hasNext()) {
            Pair<MatrixIndexes, MatrixBlock> pair = inputConverter.next();
            MatrixIndexes indexes = pair.getKey();
            MatrixBlock value = pair.getValue();
            outIndexes.setIndexes(indexes.getRowIndex(), indexes.getColumnIndex());
            ArrayList<Byte> outputs = inputOutputMap.get(thisMatrix);
            for (byte output : outputs) {
                outIndexes.setTag(output);
                out.collect(outIndexes, value);
            //LOG.info("Mapper output: "+outIndexes+", "+value+", tag: "+output);
            }
        }
    }
    reporter.incrCounter(Counters.MAP_TIME, System.currentTimeMillis() - start);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 100 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class GMRCtableBuffer method flushBuffer.

@SuppressWarnings("deprecation")
public void flushBuffer(Reporter reporter) throws RuntimeException {
    try {
        if (_mapBuffer != null) {
            //new MatrixIndexes();
            MatrixIndexes key = null;
            MatrixCell value = new MatrixCell();
            for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
                ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
                CTableMap resultMap = ctable.getValue();
                //maintain result dims and nonzeros
                for (Integer i : resultIDs) {
                    _resultNonZeros[i] += resultMap.size();
                    if (_resultDimsUnknown[i] == (byte) 1) {
                        _resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
                        _resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
                    }
                }
                //output result data 
                for (LLDoubleEntry e : resultMap.entrySet()) {
                    key = new MatrixIndexes(e.key1, e.key2);
                    value.setValue(e.value);
                    for (Integer i : resultIDs) {
                        _collector.collectOutput(key, value, i, reporter);
                    }
                }
            }
        } else if (_blockBuffer != null) {
            MatrixIndexes key = new MatrixIndexes(1, 1);
            //DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
            for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
                ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
                MatrixBlock outBlock = ctable.getValue();
                outBlock.recomputeNonZeros();
                // TODO: change hard coding of 1000
                int brlen = 1000, bclen = 1000;
                int rlen = outBlock.getNumRows();
                int clen = outBlock.getNumColumns();
                // final output matrix is smaller than a single block
                if (rlen <= brlen && clen <= brlen) {
                    key = new MatrixIndexes(1, 1);
                    for (Integer i : resultIDs) {
                        _collector.collectOutput(key, outBlock, i, reporter);
                        _resultNonZeros[i] += outBlock.getNonZeros();
                    }
                } else {
                    //Following code is similar to that in DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
                    //initialize blocks for reuse (at most 4 different blocks required)
                    MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, clen, brlen, bclen, true, outBlock.getNonZeros());
                    //create and write subblocks of matrix
                    for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
                        for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) {
                            int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
                            int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;
                            int row_offset = blockRow * brlen;
                            int col_offset = blockCol * bclen;
                            //get reuse matrix block
                            MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
                            //copy submatrix to block
                            outBlock.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
                            // TODO: skip empty "block"
                            //append block to sequence file
                            key.setIndexes(blockRow + 1, blockCol + 1);
                            for (Integer i : resultIDs) {
                                _collector.collectOutput(key, block, i, reporter);
                                _resultNonZeros[i] += block.getNonZeros();
                            }
                            //reset block for later reuse
                            block.reset();
                        }
                    }
                }
            }
        } else {
            throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
        }
    } catch (Exception ex) {
        throw new RuntimeException("Failed to flush ctable buffer.", ex);
    }
    //remove existing partial ctables
    if (_mapBuffer != null)
        _mapBuffer.clear();
    else
        _blockBuffer.clear();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) LLDoubleEntry(org.apache.sysml.runtime.util.LongLongDoubleHashMap.LLDoubleEntry) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LLDoubleEntry(org.apache.sysml.runtime.util.LongLongDoubleHashMap.LLDoubleEntry) Entry(java.util.Map.Entry) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CTableMap(org.apache.sysml.runtime.matrix.data.CTableMap) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell)

Aggregations

MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)144 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)121 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)57 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)44 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)38 Path (org.apache.hadoop.fs.Path)21 SequenceFile (org.apache.hadoop.io.SequenceFile)21 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)20 IOException (java.io.IOException)19 ArrayList (java.util.ArrayList)18 FileSystem (org.apache.hadoop.fs.FileSystem)18 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)18 Tuple2 (scala.Tuple2)17 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)15 JobConf (org.apache.hadoop.mapred.JobConf)11 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)9 File (java.io.File)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 RecordReader (org.apache.hadoop.mapred.RecordReader)6