Search in sources :

Example 46 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class SparkExecutionContext method writeRDDtoHDFS.

@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
    JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
    // piggyback nnz maintenance on write
    LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
    lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
    // save file is an action which also triggers nnz maintenance
    lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
    // return nnz aggregate of all blocks
    return aNnz.value();
}
Also used : LongAccumulator(org.apache.spark.util.LongAccumulator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction)

Example 47 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class DataPartitionerLocal method partitionBinaryBlock.

@SuppressWarnings("deprecation")
private void partitionBinaryBlock(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    try {
        // create reuse object
        _reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, brlen, bclen);
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();
        for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (// for each block
                reader.next(key, value)) {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();
                    // bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }
                    appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, brlen, bclen);
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeBinaryBlockSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir, false);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 48 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class DataPartitionerRemoteSparkMapper method call.

@Override
public Iterator<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
    List<Tuple2<Long, Writable>> ret = new LinkedList<>();
    MatrixIndexes key2 = arg0._1();
    MatrixBlock value2 = arg0._2();
    long row_offset = (key2.getRowIndex() - 1) * _brlen;
    long col_offset = (key2.getColumnIndex() - 1) * _bclen;
    long rows = value2.getNumRows();
    long cols = value2.getNumColumns();
    // bound check per block
    if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1 || col_offset + cols > _clen) {
        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + _rlen + ",1:" + _clen + "].");
    }
    // partition inputs according to partitioning scheme
    switch(_dpf) {
        case ROW_WISE:
            {
                MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
                for (int i = 0; i < rows; i++) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                    tmp.block = blks[i];
                    ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
                }
                break;
            }
        case ROW_BLOCK_WISE:
            {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                tmp.block = new MatrixBlock(value2);
                ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
                break;
            }
        case ROW_BLOCK_WISE_N:
            {
                if (_n >= _brlen) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
                    tmp.block = new MatrixBlock(value2);
                    ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
                } else {
                    for (int i = 0; i < rows; i += _n) {
                        PairWritableBlock tmp = new PairWritableBlock();
                        tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                        tmp.block = value2.slice(i, Math.min(i + (int) _n - 1, value2.getNumRows() - 1), 0, value2.getNumColumns() - 1, new MatrixBlock());
                        ret.add(new Tuple2<Long, Writable>(new Long((row_offset + i) / _n + 1), tmp));
                    }
                }
                break;
            }
        case COLUMN_WISE:
            {
                MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
                for (int i = 0; i < cols; i++) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                    tmp.block = blks[i];
                    ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
                }
                break;
            }
        case COLUMN_BLOCK_WISE:
            {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                tmp.block = new MatrixBlock(value2);
                ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
                break;
            }
        case COLUMN_BLOCK_WISE_N:
            {
                if (_n >= _bclen) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
                    tmp.block = new MatrixBlock(value2);
                    ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
                } else {
                    for (int i = 0; i < cols; i += _n) {
                        PairWritableBlock tmp = new PairWritableBlock();
                        tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                        tmp.block = value2.slice(0, value2.getNumRows() - 1, i, Math.min(i + (int) _n - 1, value2.getNumColumns() - 1), new MatrixBlock());
                        ret.add(new Tuple2<Long, Writable>(new Long((col_offset + i) / _n + 1), tmp));
                    }
                }
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
    }
    return ret.iterator();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) IOException(java.io.IOException) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 49 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class AggregateBinaryInstruction method processMapMultInstruction.

/**
 * Helper function to perform map-side matrix-matrix multiplication.
 *
 * @param valueClass matrix value class
 * @param cachedValues cached value map
 * @param in1 indexed matrix value 1
 * @param in2 indexed matrix value 2
 * @param blockRowFactor ?
 * @param blockColFactor ?
 */
private void processMapMultInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue in1, IndexedMatrixValue in2, int blockRowFactor, int blockColFactor) {
    boolean removeOutput = true;
    if (_cacheType.isRight()) {
        DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
        long in2_cols = dcInput.getNumCols();
        long in2_colBlocks = (long) Math.ceil(((double) in2_cols) / dcInput.getNumColsPerBlock());
        for (int bidx = 1; bidx <= in2_colBlocks; bidx++) {
            // Matrix multiply A[i,k] %*% B[k,bid]
            // Setup input2 block
            IndexedMatrixValue in2Block = dcInput.getDataBlock((int) in1.getIndexes().getColumnIndex(), bidx);
            MatrixValue in2BlockValue = in2Block.getValue();
            MatrixIndexes in2BlockIndex = in2Block.getIndexes();
            // allocate space for the output value
            IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
            // process instruction
            OperationsOnMatrixValues.performAggregateBinary(in1.getIndexes(), (MatrixBlock) in1.getValue(), in2BlockIndex, (MatrixBlock) in2BlockValue, out.getIndexes(), (MatrixBlock) out.getValue(), ((AggregateBinaryOperator) optr));
            removeOutput &= (!_outputEmptyBlocks && out.getValue().isEmpty());
        }
    } else {
        DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input1);
        long in1_rows = dcInput.getNumRows();
        long in1_rowsBlocks = (long) Math.ceil(((double) in1_rows) / dcInput.getNumRowsPerBlock());
        for (int bidx = 1; bidx <= in1_rowsBlocks; bidx++) {
            // Matrix multiply A[i,k] %*% B[k,bid]
            // Setup input2 block
            IndexedMatrixValue in1Block = dcInput.getDataBlock(bidx, (int) in2.getIndexes().getRowIndex());
            MatrixValue in1BlockValue = in1Block.getValue();
            MatrixIndexes in1BlockIndex = in1Block.getIndexes();
            // allocate space for the output value
            IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
            // process instruction
            OperationsOnMatrixValues.performAggregateBinary(in1BlockIndex, (MatrixBlock) in1BlockValue, in2.getIndexes(), (MatrixBlock) in2.getValue(), out.getIndexes(), (MatrixBlock) out.getValue(), ((AggregateBinaryOperator) optr));
            removeOutput &= (!_outputEmptyBlocks && out.getValue().isEmpty());
        }
    }
    // empty block output filter (enabled by compiler consumer operation is in CP)
    if (removeOutput)
        cachedValues.remove(output);
}
Also used : DistributedCacheInput(org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 50 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class AppendGInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int brlen, int bclen) {
    // setup basic meta data
    int blen = _cbind ? bclen : brlen;
    // Step 1: handle first input (forward blocks, change dim of last block)
    ArrayList<IndexedMatrixValue> blkList1 = cachedValues.get(input1);
    if (blkList1 != null)
        for (IndexedMatrixValue in1 : blkList1) {
            if (in1 == null)
                continue;
            if (_offset % blen == 0) {
                // special case: forward only
                cachedValues.add(output, in1);
            } else // general case: change dims and forward
            {
                MatrixIndexes tmpix = in1.getIndexes();
                // always block
                MatrixBlock tmpval = (MatrixBlock) in1.getValue();
                if (// border block
                _cbind && _offset / blen + 1 == tmpix.getColumnIndex() || !_cbind && _offset / blen + 1 == tmpix.getRowIndex()) {
                    IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
                    // always block
                    MatrixBlock tmpvalNew = (MatrixBlock) data.getValue();
                    int lrlen = _cbind ? tmpval.getNumRows() : Math.min(blen, (int) (_len - (tmpix.getRowIndex() - 1) * blen));
                    int lclen = _cbind ? Math.min(blen, (int) (_len - (tmpix.getColumnIndex() - 1) * blen)) : tmpval.getNumColumns();
                    tmpvalNew.reset(lrlen, lclen);
                    tmpvalNew.copy(0, tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, tmpval, true);
                    data.getIndexes().setIndexes(tmpix);
                } else // inner block
                {
                    cachedValues.add(output, in1);
                }
            }
        }
    // Step 2: handle second input (split/forward blocks with new index)
    ArrayList<IndexedMatrixValue> blkList2 = cachedValues.get(input2);
    if (blkList2 != null)
        for (IndexedMatrixValue in2 : blkList2) {
            if (in2 == null)
                continue;
            MatrixIndexes tmpix = in2.getIndexes();
            // always block
            MatrixBlock tmpval = (MatrixBlock) in2.getValue();
            if (// special case no split
            _offset % bclen == 0) {
                IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
                MatrixIndexes ix1 = data.getIndexes();
                long rix = _cbind ? tmpix.getRowIndex() : _offset / blen + tmpix.getRowIndex();
                long cix = _cbind ? _offset / blen + tmpix.getColumnIndex() : tmpix.getColumnIndex();
                ix1.setIndexes(rix, cix);
                data.set(ix1, in2.getValue());
            } else // general case: split and forward
            {
                IndexedMatrixValue data1 = cachedValues.holdPlace(output, valueClass);
                MatrixIndexes ix1 = data1.getIndexes();
                // always block
                MatrixBlock tmpvalNew = (MatrixBlock) data1.getValue();
                if (_cbind) {
                    // first half
                    int cix1 = (int) (_offset / blen + tmpix.getColumnIndex());
                    int cols1 = Math.min(blen, (int) (_len - (long) (cix1 - 1) * blen));
                    ix1.setIndexes(tmpix.getRowIndex(), cix1);
                    tmpvalNew.reset(tmpval.getNumRows(), cols1);
                    tmpvalNew.copy(0, tmpval.getNumRows() - 1, (int) ((_offset + 1) % blen) - 1, cols1 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, 0, (int) (cols1 - ((_offset) % blen) - 1), new MatrixBlock()), true);
                    data1.getIndexes().setIndexes(ix1);
                    if (cols1 - ((_offset) % blen) < tmpval.getNumColumns()) {
                        // second half (if required)
                        IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
                        MatrixIndexes ix2 = data2.getIndexes();
                        // always block
                        MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
                        int cix2 = (int) (_offset / blen + 1 + tmpix.getColumnIndex());
                        int cols2 = Math.min(blen, (int) (_len - (long) (cix2 - 1) * blen));
                        ix2.setIndexes(tmpix.getRowIndex(), cix2);
                        tmpvalNew2.reset(tmpval.getNumRows(), cols2);
                        tmpvalNew2.copy(0, tmpval.getNumRows() - 1, 0, cols2 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, (int) (cols1 - ((_offset) % blen)), tmpval.getNumColumns() - 1, new MatrixBlock()), true);
                        data2.getIndexes().setIndexes(ix2);
                    }
                } else // rbind
                {
                    // first half
                    int rix1 = (int) (_offset / blen + tmpix.getRowIndex());
                    int rows1 = Math.min(blen, (int) (_len - (long) (rix1 - 1) * blen));
                    ix1.setIndexes(rix1, tmpix.getColumnIndex());
                    tmpvalNew.reset(rows1, tmpval.getNumColumns());
                    tmpvalNew.copy((int) ((_offset + 1) % blen) - 1, rows1 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice(0, (int) (rows1 - ((_offset) % blen) - 1), 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
                    data1.getIndexes().setIndexes(ix1);
                    if (rows1 - ((_offset) % blen) < tmpval.getNumRows()) {
                        // second half (if required)
                        IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
                        MatrixIndexes ix2 = data2.getIndexes();
                        // always block
                        MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
                        int rix2 = (int) (_offset / blen + 1 + tmpix.getRowIndex());
                        int rows2 = Math.min(blen, (int) (_len - (long) (rix2 - 1) * blen));
                        ix2.setIndexes(rix2, tmpix.getColumnIndex());
                        tmpvalNew2.reset(rows2, tmpval.getNumColumns());
                        tmpvalNew2.copy(0, rows2 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice((int) (rows1 - ((_offset) % blen)), tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
                        data2.getIndexes().setIndexes(ix2);
                    }
                }
            }
        }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Aggregations

MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)165 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)142 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)70 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)41 Path (org.apache.hadoop.fs.Path)24 SequenceFile (org.apache.hadoop.io.SequenceFile)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)20 FileSystem (org.apache.hadoop.fs.FileSystem)20 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)19 Tuple2 (scala.Tuple2)19 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)17 JobConf (org.apache.hadoop.mapred.JobConf)14 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)11 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)10 File (java.io.File)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9