Search in sources :

Example 76 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class SparkExecutionContext method writeRDDtoHDFS.

@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
    JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
    // piggyback nnz maintenance on write
    LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
    lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
    // save file is an action which also triggers nnz maintenance
    lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
    // return nnz aggregate of all blocks
    return aNnz.value();
}
Also used : LongAccumulator(org.apache.spark.util.LongAccumulator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction)

Example 77 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class DataPartitionerLocal method partitionBinaryBlock.

@SuppressWarnings("deprecation")
private void partitionBinaryBlock(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
    try {
        // create reuse object
        _reuseBlk = DataPartitioner.createReuseMatrixBlock(_format, brlen, bclen);
        // STEP 1: read matrix from HDFS and write blocks to local staging area
        // check and add input path
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(fname);
        FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
        // prepare sequence file reader, and write to local staging area
        MatrixIndexes key = new MatrixIndexes();
        MatrixBlock value = new MatrixBlock();
        for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
            try {
                while (// for each block
                reader.next(key, value)) {
                    long row_offset = (key.getRowIndex() - 1) * brlen;
                    long col_offset = (key.getColumnIndex() - 1) * bclen;
                    long rows = value.getNumRows();
                    long cols = value.getNumColumns();
                    // bound check per block
                    if (row_offset + rows < 1 || row_offset + rows > rlen || col_offset + cols < 1 || col_offset + cols > clen) {
                        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
                    }
                    appendBlockToStagingArea(fnameStaging, value, row_offset, col_offset, brlen, bclen);
                }
            } finally {
                IOUtilFunctions.closeSilently(reader);
            }
        }
        // STEP 2: read matrix blocks from staging area and write matrix to HDFS
        String[] fnamesPartitions = new File(fnameStaging).list();
        if (PARALLEL) {
            int len = Math.min(fnamesPartitions.length, _par);
            Thread[] threads = new Thread[len];
            for (int i = 0; i < len; i++) {
                int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
                int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
                end = Math.min(end, fnamesPartitions.length - 1);
                threads[i] = new Thread(new DataPartitionerWorkerBinaryBlock(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
                threads[i].start();
            }
            for (Thread t : threads) t.join();
        } else {
            for (String pdir : fnamesPartitions) writeBinaryBlockSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir, false);
        }
    } catch (Exception e) {
        throw new DMLRuntimeException("Unable to partition binary block matrix.", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) RecordReader(org.apache.hadoop.mapred.RecordReader) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 78 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class DataPartitionerRemoteSparkMapper method call.

@Override
public Iterator<Tuple2<Long, Writable>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
    List<Tuple2<Long, Writable>> ret = new LinkedList<>();
    MatrixIndexes key2 = arg0._1();
    MatrixBlock value2 = arg0._2();
    long row_offset = (key2.getRowIndex() - 1) * _brlen;
    long col_offset = (key2.getColumnIndex() - 1) * _bclen;
    long rows = value2.getNumRows();
    long cols = value2.getNumColumns();
    // bound check per block
    if (row_offset + rows < 1 || row_offset + rows > _rlen || col_offset + cols < 1 || col_offset + cols > _clen) {
        throw new IOException("Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + _rlen + ",1:" + _clen + "].");
    }
    // partition inputs according to partitioning scheme
    switch(_dpf) {
        case ROW_WISE:
            {
                MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, false);
                for (int i = 0; i < rows; i++) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                    tmp.block = blks[i];
                    ret.add(new Tuple2<Long, Writable>(new Long(row_offset + 1 + i), tmp));
                }
                break;
            }
        case ROW_BLOCK_WISE:
            {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                tmp.block = new MatrixBlock(value2);
                ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _brlen + 1), tmp));
                break;
            }
        case ROW_BLOCK_WISE_N:
            {
                if (_n >= _brlen) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(((row_offset % _n) / _brlen) + 1, col_offset / _bclen + 1);
                    tmp.block = new MatrixBlock(value2);
                    ret.add(new Tuple2<Long, Writable>(new Long(row_offset / _n + 1), tmp));
                } else {
                    for (int i = 0; i < rows; i += _n) {
                        PairWritableBlock tmp = new PairWritableBlock();
                        tmp.indexes = new MatrixIndexes(1, col_offset / _bclen + 1);
                        tmp.block = value2.slice(i, Math.min(i + (int) _n - 1, value2.getNumRows() - 1), 0, value2.getNumColumns() - 1, new MatrixBlock());
                        ret.add(new Tuple2<Long, Writable>(new Long((row_offset + i) / _n + 1), tmp));
                    }
                }
                break;
            }
        case COLUMN_WISE:
            {
                MatrixBlock[] blks = DataConverter.convertToMatrixBlockPartitions(value2, true);
                for (int i = 0; i < cols; i++) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                    tmp.block = blks[i];
                    ret.add(new Tuple2<Long, Writable>(new Long(col_offset + 1 + i), tmp));
                }
                break;
            }
        case COLUMN_BLOCK_WISE:
            {
                PairWritableBlock tmp = new PairWritableBlock();
                tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                tmp.block = new MatrixBlock(value2);
                ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _bclen + 1), tmp));
                break;
            }
        case COLUMN_BLOCK_WISE_N:
            {
                if (_n >= _bclen) {
                    PairWritableBlock tmp = new PairWritableBlock();
                    tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, ((col_offset % _n) / _bclen) + 1);
                    tmp.block = new MatrixBlock(value2);
                    ret.add(new Tuple2<Long, Writable>(new Long(col_offset / _n + 1), tmp));
                } else {
                    for (int i = 0; i < cols; i += _n) {
                        PairWritableBlock tmp = new PairWritableBlock();
                        tmp.indexes = new MatrixIndexes(row_offset / _brlen + 1, 1);
                        tmp.block = value2.slice(0, value2.getNumRows() - 1, i, Math.min(i + (int) _n - 1, value2.getNumColumns() - 1), new MatrixBlock());
                        ret.add(new Tuple2<Long, Writable>(new Long((col_offset + i) / _n + 1), tmp));
                    }
                }
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported partition format: " + _dpf);
    }
    return ret.iterator();
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) Tuple2(scala.Tuple2) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) PairWritableBlock(org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock) IOException(java.io.IOException) LinkedList(java.util.LinkedList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 79 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class AppendGInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int brlen, int bclen) {
    // setup basic meta data
    int blen = _cbind ? bclen : brlen;
    // Step 1: handle first input (forward blocks, change dim of last block)
    ArrayList<IndexedMatrixValue> blkList1 = cachedValues.get(input1);
    if (blkList1 != null)
        for (IndexedMatrixValue in1 : blkList1) {
            if (in1 == null)
                continue;
            if (_offset % blen == 0) {
                // special case: forward only
                cachedValues.add(output, in1);
            } else // general case: change dims and forward
            {
                MatrixIndexes tmpix = in1.getIndexes();
                // always block
                MatrixBlock tmpval = (MatrixBlock) in1.getValue();
                if (// border block
                _cbind && _offset / blen + 1 == tmpix.getColumnIndex() || !_cbind && _offset / blen + 1 == tmpix.getRowIndex()) {
                    IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
                    // always block
                    MatrixBlock tmpvalNew = (MatrixBlock) data.getValue();
                    int lrlen = _cbind ? tmpval.getNumRows() : Math.min(blen, (int) (_len - (tmpix.getRowIndex() - 1) * blen));
                    int lclen = _cbind ? Math.min(blen, (int) (_len - (tmpix.getColumnIndex() - 1) * blen)) : tmpval.getNumColumns();
                    tmpvalNew.reset(lrlen, lclen);
                    tmpvalNew.copy(0, tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, tmpval, true);
                    data.getIndexes().setIndexes(tmpix);
                } else // inner block
                {
                    cachedValues.add(output, in1);
                }
            }
        }
    // Step 2: handle second input (split/forward blocks with new index)
    ArrayList<IndexedMatrixValue> blkList2 = cachedValues.get(input2);
    if (blkList2 != null)
        for (IndexedMatrixValue in2 : blkList2) {
            if (in2 == null)
                continue;
            MatrixIndexes tmpix = in2.getIndexes();
            // always block
            MatrixBlock tmpval = (MatrixBlock) in2.getValue();
            if (// special case no split
            _offset % bclen == 0) {
                IndexedMatrixValue data = cachedValues.holdPlace(output, valueClass);
                MatrixIndexes ix1 = data.getIndexes();
                long rix = _cbind ? tmpix.getRowIndex() : _offset / blen + tmpix.getRowIndex();
                long cix = _cbind ? _offset / blen + tmpix.getColumnIndex() : tmpix.getColumnIndex();
                ix1.setIndexes(rix, cix);
                data.set(ix1, in2.getValue());
            } else // general case: split and forward
            {
                IndexedMatrixValue data1 = cachedValues.holdPlace(output, valueClass);
                MatrixIndexes ix1 = data1.getIndexes();
                // always block
                MatrixBlock tmpvalNew = (MatrixBlock) data1.getValue();
                if (_cbind) {
                    // first half
                    int cix1 = (int) (_offset / blen + tmpix.getColumnIndex());
                    int cols1 = Math.min(blen, (int) (_len - (long) (cix1 - 1) * blen));
                    ix1.setIndexes(tmpix.getRowIndex(), cix1);
                    tmpvalNew.reset(tmpval.getNumRows(), cols1);
                    tmpvalNew.copy(0, tmpval.getNumRows() - 1, (int) ((_offset + 1) % blen) - 1, cols1 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, 0, (int) (cols1 - ((_offset) % blen) - 1), new MatrixBlock()), true);
                    data1.getIndexes().setIndexes(ix1);
                    if (cols1 - ((_offset) % blen) < tmpval.getNumColumns()) {
                        // second half (if required)
                        IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
                        MatrixIndexes ix2 = data2.getIndexes();
                        // always block
                        MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
                        int cix2 = (int) (_offset / blen + 1 + tmpix.getColumnIndex());
                        int cols2 = Math.min(blen, (int) (_len - (long) (cix2 - 1) * blen));
                        ix2.setIndexes(tmpix.getRowIndex(), cix2);
                        tmpvalNew2.reset(tmpval.getNumRows(), cols2);
                        tmpvalNew2.copy(0, tmpval.getNumRows() - 1, 0, cols2 - 1, tmpval.slice(0, tmpval.getNumRows() - 1, (int) (cols1 - ((_offset) % blen)), tmpval.getNumColumns() - 1, new MatrixBlock()), true);
                        data2.getIndexes().setIndexes(ix2);
                    }
                } else // rbind
                {
                    // first half
                    int rix1 = (int) (_offset / blen + tmpix.getRowIndex());
                    int rows1 = Math.min(blen, (int) (_len - (long) (rix1 - 1) * blen));
                    ix1.setIndexes(rix1, tmpix.getColumnIndex());
                    tmpvalNew.reset(rows1, tmpval.getNumColumns());
                    tmpvalNew.copy((int) ((_offset + 1) % blen) - 1, rows1 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice(0, (int) (rows1 - ((_offset) % blen) - 1), 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
                    data1.getIndexes().setIndexes(ix1);
                    if (rows1 - ((_offset) % blen) < tmpval.getNumRows()) {
                        // second half (if required)
                        IndexedMatrixValue data2 = cachedValues.holdPlace(output, valueClass);
                        MatrixIndexes ix2 = data2.getIndexes();
                        // always block
                        MatrixBlock tmpvalNew2 = (MatrixBlock) data2.getValue();
                        int rix2 = (int) (_offset / blen + 1 + tmpix.getRowIndex());
                        int rows2 = Math.min(blen, (int) (_len - (long) (rix2 - 1) * blen));
                        ix2.setIndexes(rix2, tmpix.getColumnIndex());
                        tmpvalNew2.reset(rows2, tmpval.getNumColumns());
                        tmpvalNew2.copy(0, rows2 - 1, 0, tmpval.getNumColumns() - 1, tmpval.slice((int) (rows1 - ((_offset) % blen)), tmpval.getNumRows() - 1, 0, tmpval.getNumColumns() - 1, new MatrixBlock()), true);
                        data2.getIndexes().setIndexes(ix2);
                    }
                }
            }
        }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 80 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class QuaternaryInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
    QuaternaryOperator qop = (QuaternaryOperator) optr;
    ArrayList<IndexedMatrixValue> blkList = cachedValues.get(_input1);
    if (blkList != null)
        for (IndexedMatrixValue imv : blkList) {
            // Step 1: prepare inputs and output
            if (imv == null)
                continue;
            MatrixIndexes inIx = imv.getIndexes();
            MatrixBlock inVal = (MatrixBlock) imv.getValue();
            // allocate space for the output value
            IndexedMatrixValue iout = null;
            if (output == _input1)
                iout = tempValue;
            else
                iout = cachedValues.holdPlace(output, valueClass);
            MatrixIndexes outIx = iout.getIndexes();
            MatrixValue outVal = iout.getValue();
            // Step 2: get remaining inputs: Wij, Ui, Vj
            MatrixBlock Xij = inVal;
            // get Wij if existing (null of WeightsType.NONE or WSigmoid any type)
            IndexedMatrixValue iWij = (_input4 != -1) ? cachedValues.getFirst(_input4) : null;
            MatrixValue Wij = (iWij != null) ? iWij.getValue() : null;
            if (null == Wij && qop.hasFourInputs()) {
                MatrixBlock mb = new MatrixBlock(1, 1, false);
                String[] parts = InstructionUtils.getInstructionParts(instString);
                mb.quickSetValue(0, 0, Double.valueOf(parts[4]));
                Wij = mb;
            }
            // get Ui and Vj, potentially through distributed cache
            MatrixValue Ui = // U
            (!_cacheU) ? // U
            cachedValues.getFirst(_input2).getValue() : MRBaseForCommonInstructions.dcValues.get(_input2).getDataBlock((int) inIx.getRowIndex(), 1).getValue();
            MatrixValue Vj = // t(V)
            (!_cacheV) ? // t(V)
            cachedValues.getFirst(_input3).getValue() : MRBaseForCommonInstructions.dcValues.get(_input3).getDataBlock((int) inIx.getColumnIndex(), 1).getValue();
            // handle special input case: //V through shuffle -> t(V)
            if (Ui.getNumColumns() != Vj.getNumColumns()) {
                Vj = LibMatrixReorg.reorg((MatrixBlock) Vj, new MatrixBlock(Vj.getNumColumns(), Vj.getNumRows(), Vj.isInSparseFormat()), new ReorgOperator(SwapIndex.getSwapIndexFnObject()));
            }
            // Step 3: process instruction
            Xij.quaternaryOperations(qop, (MatrixBlock) Ui, (MatrixBlock) Vj, (MatrixBlock) Wij, (MatrixBlock) outVal);
            if (qop.wtype1 != null || qop.wtype4 != null)
                // wsloss
                outIx.setIndexes(1, 1);
            else if (qop.wtype2 != null || qop.wtype5 != null || qop.wtype3 != null && qop.wtype3.isBasic())
                // wsigmoid/wdivmm-basic
                outIx.setIndexes(inIx);
            else {
                // wdivmm
                boolean left = qop.wtype3.isLeft();
                outIx.setIndexes(left ? inIx.getColumnIndex() : inIx.getRowIndex(), 1);
            }
            // put the output value in the cache
            if (iout == tempValue)
                cachedValues.add(output, iout);
        }
}
Also used : QuaternaryOperator(org.apache.sysml.runtime.matrix.operators.QuaternaryOperator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReorgOperator(org.apache.sysml.runtime.matrix.operators.ReorgOperator) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Aggregations

MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)459 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)142 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)111 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)102 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)48 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 IOException (java.io.IOException)44 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)41 ArrayList (java.util.ArrayList)40 Path (org.apache.hadoop.fs.Path)29 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)24 FileSystem (org.apache.hadoop.fs.FileSystem)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)23 JobConf (org.apache.hadoop.mapred.JobConf)21 Tuple2 (scala.Tuple2)19 SequenceFile (org.apache.hadoop.io.SequenceFile)17 Row (org.apache.spark.sql.Row)14 SparseBlock (org.apache.sysml.runtime.matrix.data.SparseBlock)14 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)14 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)13