Search in sources :

Example 36 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project incubator-systemml by apache.

the class ReorgInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
    ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input);
    if (blkList != null)
        for (IndexedMatrixValue in : blkList) {
            if (in == null)
                continue;
            int startRow = 0, startColumn = 0, length = 0;
            // process instruction
            if (((ReorgOperator) optr).fn instanceof DiagIndex) {
                // special diag handling (overloaded, size-dependent operation; hence decided during runtime)
                boolean V2M = (_mcIn.getRows() == 1 || _mcIn.getCols() == 1);
                // input can be row/column vector
                long rlen = Math.max(_mcIn.getRows(), _mcIn.getCols());
                // Note: for M2V we directly skip non-diagonal blocks block
                if (V2M || in.getIndexes().getRowIndex() == in.getIndexes().getColumnIndex()) {
                    if (V2M) {
                        // allocate space for the output value
                        IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
                        OperationsOnMatrixValues.performReorg(in.getIndexes(), in.getValue(), out.getIndexes(), out.getValue(), ((ReorgOperator) optr), startRow, startColumn, length);
                        // (only for block representation)
                        if (_outputEmptyBlocks && valueClass.equals(MatrixBlock.class)) {
                            // row index is equal to the col index
                            long diagIndex = out.getIndexes().getRowIndex();
                            long brlen = Math.max(_mcIn.getRowsPerBlock(), _mcIn.getColsPerBlock());
                            long numRowBlocks = (rlen / brlen) + ((rlen % brlen != 0) ? 1 : 0);
                            for (long rc = 1; rc <= numRowBlocks; rc++) {
                                // prevent duplicate output
                                if (rc == diagIndex)
                                    continue;
                                IndexedMatrixValue emptyIndexValue = cachedValues.holdPlace(output, valueClass);
                                int lbrlen = (int) ((rc * brlen <= rlen) ? brlen : rlen % brlen);
                                emptyIndexValue.getIndexes().setIndexes(rc, diagIndex);
                                emptyIndexValue.getValue().reset(lbrlen, out.getValue().getNumColumns(), true);
                            }
                        }
                    } else // M2V
                    {
                        // allocate space for the output value
                        IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
                        // compute matrix indexes
                        out.getIndexes().setIndexes(in.getIndexes().getRowIndex(), 1);
                        // compute result block
                        in.getValue().reorgOperations((ReorgOperator) optr, out.getValue(), startRow, startColumn, length);
                    }
                }
            } else if (((ReorgOperator) optr).fn instanceof RevIndex) {
                // execute reverse operation
                ArrayList<IndexedMatrixValue> out = new ArrayList<>();
                LibMatrixReorg.rev(in, _mcIn.getRows(), _mcIn.getRowsPerBlock(), out);
                // output indexed matrix values
                for (IndexedMatrixValue outblk : out) cachedValues.add(output, outblk);
            } else // general case (e.g., transpose)
            {
                // allocate space for the output value
                IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
                OperationsOnMatrixValues.performReorg(in.getIndexes(), in.getValue(), out.getIndexes(), out.getValue(), ((ReorgOperator) optr), startRow, startColumn, length);
            }
        }
}
Also used : RevIndex(org.apache.sysml.runtime.functionobjects.RevIndex) DiagIndex(org.apache.sysml.runtime.functionobjects.DiagIndex) ArrayList(java.util.ArrayList) ReorgOperator(org.apache.sysml.runtime.matrix.operators.ReorgOperator) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 37 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project incubator-systemml by apache.

the class UaggOuterChainInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
    ArrayList<IndexedMatrixValue> blkList = null;
    boolean rightCached = (_uaggOp.indexFn instanceof ReduceCol || _uaggOp.indexFn instanceof ReduceAll || !LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp));
    // get the main data input
    if (rightCached)
        blkList = cachedValues.get(input1);
    else
        // ReduceRow
        blkList = cachedValues.get(input2);
    if (blkList == null)
        return;
    for (IndexedMatrixValue imv : blkList) {
        if (imv == null)
            continue;
        MatrixIndexes in1Ix = imv.getIndexes();
        MatrixValue in1Val = imv.getValue();
        // allocate space for the intermediate and output value
        IndexedMatrixValue iout = cachedValues.holdPlace(output, valueClass);
        MatrixIndexes outIx = iout.getIndexes();
        MatrixValue outVal = iout.getValue();
        MatrixBlock corr = null;
        // get the distributed cache input
        byte dcInputIx = rightCached ? input2 : input1;
        DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(dcInputIx);
        // process instruction
        if (LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp)) {
            if ((LibMatrixOuterAgg.isRowIndexMax(_uaggOp)) || (LibMatrixOuterAgg.isRowIndexMin(_uaggOp))) {
                if (_bv == null) {
                    if (rightCached)
                        _bv = dcInput.getRowVectorArray();
                    else
                        _bv = dcInput.getColumnVectorArray();
                    _bvi = LibMatrixOuterAgg.prepareRowIndices(_bv.length, _bv, _bOp, _uaggOp);
                }
            } else {
                // step 1: prepare sorted rhs input (once per task)
                if (_bv == null) {
                    if (rightCached)
                        _bv = dcInput.getRowVectorArray();
                    else
                        _bv = dcInput.getColumnVectorArray();
                    Arrays.sort(_bv);
                }
            }
            LibMatrixOuterAgg.resetOutputMatrix(in1Ix, (MatrixBlock) in1Val, outIx, (MatrixBlock) outVal, _uaggOp);
            LibMatrixOuterAgg.aggregateMatrix((MatrixBlock) in1Val, (MatrixBlock) outVal, _bv, _bvi, _bOp, _uaggOp);
        } else // default case
        {
            long in2_cols = dcInput.getNumCols();
            long in2_colBlocks = (long) Math.ceil(((double) in2_cols) / dcInput.getNumColsPerBlock());
            for (int bidx = 1; bidx <= in2_colBlocks; bidx++) {
                IndexedMatrixValue imv2 = dcInput.getDataBlock(1, bidx);
                MatrixValue in2Val = imv2.getValue();
                // outer block operation
                OperationsOnMatrixValues.performBinaryIgnoreIndexes(in1Val, in2Val, _tmpVal1, _bOp);
                // unary aggregate operation
                OperationsOnMatrixValues.performAggregateUnary(in1Ix, _tmpVal1, outIx, _tmpVal2, _uaggOp, blockRowFactor, blockColFactor);
                // aggregate over all rhs blocks
                if (corr == null) {
                    outVal.reset(_tmpVal2.getNumRows(), _tmpVal2.getNumColumns(), false);
                    corr = new MatrixBlock(_tmpVal2.getNumRows(), _tmpVal2.getNumColumns(), false);
                }
                if (_aggOp.correctionExists)
                    OperationsOnMatrixValues.incrementalAggregation(outVal, corr, _tmpVal2, _aggOp, true);
                else
                    OperationsOnMatrixValues.incrementalAggregation(outVal, null, _tmpVal2, _aggOp, true);
            }
        }
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) DistributedCacheInput(org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 38 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project incubator-systemml by apache.

the class CpmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get rdd inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        // prune empty blocks of ultra-sparse matrices
        in1 = in1.filter(new FilterNonEmptyBlocksFunction());
        in2 = in2.filter(new FilterNonEmptyBlocksFunction());
    }
    // compute preferred join degree of parallelism
    int numPreferred = getPreferredParJoin(mc1, mc2, in1.getNumPartitions(), in2.getNumPartitions());
    int numPartJoin = Math.min(getMaxParJoin(mc1, mc2), numPreferred);
    // process core cpmm matrix multiply
    JavaPairRDD<Long, IndexedMatrixValue> tmp1 = in1.mapToPair(new CpmmIndexFunction(true));
    JavaPairRDD<Long, IndexedMatrixValue> tmp2 = in2.mapToPair(new CpmmIndexFunction(false));
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join over common dimension
    numPartJoin).mapToPair(// compute block multiplications
    new CpmmMultiplyFunction());
    // process cpmm aggregation and handle outputs
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        // prune empty blocks and aggregate all results
        out = out.filter(new FilterNonEmptyBlocksFunction());
        MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
    } else {
        // DEFAULT: MULTI_BLOCK
        out = RDDAggregateUtils.sumByKeyStable(out, false);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        // update output statistics if not inferred
        updateBinaryMMOutputMatrixCharacteristics(sec, true);
    }
}
Also used : FilterNonEmptyBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 39 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project incubator-systemml by apache.

the class LibMatrixReorg method rexpand.

/**
 * MR/Spark rexpand operation (single input, multiple outputs incl empty blocks)
 *
 * @param data indexed matrix value
 * @param max ?
 * @param rows ?
 * @param cast ?
 * @param ignore ?
 * @param brlen number of rows in a block
 * @param bclen number of columns in a block
 * @param outList list of indexed matrix values
 */
public static void rexpand(IndexedMatrixValue data, double max, boolean rows, boolean cast, boolean ignore, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
    // prepare parameters
    MatrixIndexes ix = data.getIndexes();
    MatrixBlock in = (MatrixBlock) data.getValue();
    // execute rexpand operations incl sanity checks
    // TODO more robust (memory efficient) implementation w/o tmp block
    MatrixBlock tmp = rexpand(in, new MatrixBlock(), max, rows, cast, ignore, 1);
    // prepare outputs blocks (slice tmp block into output blocks )
    if (// expanded vertically
    rows) {
        for (int rl = 0; rl < tmp.getNumRows(); rl += brlen) {
            MatrixBlock mb = tmp.slice(rl, (int) (Math.min(rl + brlen, tmp.getNumRows()) - 1), 0, tmp.getNumColumns() - 1, new MatrixBlock());
            outList.add(new IndexedMatrixValue(new MatrixIndexes(rl / brlen + 1, ix.getRowIndex()), mb));
        }
    } else // expanded horizontally
    {
        for (int cl = 0; cl < tmp.getNumColumns(); cl += bclen) {
            MatrixBlock mb = tmp.slice(0, tmp.getNumRows() - 1, cl, (int) (Math.min(cl + bclen, tmp.getNumColumns()) - 1), new MatrixBlock());
            outList.add(new IndexedMatrixValue(new MatrixIndexes(ix.getRowIndex(), cl / bclen + 1), mb));
        }
    }
}
Also used : IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 40 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project incubator-systemml by apache.

the class LibMatrixReorg method rmempty.

/**
 * MR rmempty interface - for rmempty we cannot view blocks independently, and hence,
 * there are different CP and MR interfaces.
 *
 * @param data ?
 * @param offset ?
 * @param rmRows ?
 * @param len ?
 * @param brlen number of rows in a block
 * @param bclen number of columns in a block
 * @param outList list of indexed matrix values
 */
public static void rmempty(IndexedMatrixValue data, IndexedMatrixValue offset, boolean rmRows, long len, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
    // sanity check inputs
    if (!(data.getValue() instanceof MatrixBlock && offset.getValue() instanceof MatrixBlock))
        throw new DMLRuntimeException("Unsupported input data: expected " + MatrixBlock.class.getName() + " but got " + data.getValue().getClass().getName() + " and " + offset.getValue().getClass().getName());
    if (rmRows && data.getValue().getNumRows() != offset.getValue().getNumRows() || !rmRows && data.getValue().getNumColumns() != offset.getValue().getNumColumns()) {
        throw new DMLRuntimeException("Dimension mismatch between input data and offsets: [" + data.getValue().getNumRows() + "x" + data.getValue().getNumColumns() + " vs " + offset.getValue().getNumRows() + "x" + offset.getValue().getNumColumns());
    }
    // compute outputs (at most two output blocks)
    HashMap<MatrixIndexes, IndexedMatrixValue> out = new HashMap<>();
    MatrixBlock linData = (MatrixBlock) data.getValue();
    MatrixBlock linOffset = (MatrixBlock) offset.getValue();
    MatrixIndexes tmpIx = new MatrixIndexes(-1, -1);
    if (// margin = "rows"
    rmRows) {
        long rlen = len;
        long clen = linData.getNumColumns();
        for (int i = 0; i < linOffset.getNumRows(); i++) {
            long rix = (long) linOffset.quickGetValue(i, 0);
            if (// otherwise empty row
            rix > 0) {
                // get single row from source block
                MatrixBlock src = (MatrixBlock) linData.slice(i, i, 0, (int) (clen - 1), new MatrixBlock());
                long brix = (rix - 1) / brlen + 1;
                long lbrix = (rix - 1) % brlen;
                tmpIx.setIndexes(brix, data.getIndexes().getColumnIndex());
                // create target block if necessary
                if (!out.containsKey(tmpIx)) {
                    IndexedMatrixValue tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
                    tmpIMV.getIndexes().setIndexes(tmpIx);
                    ((MatrixBlock) tmpIMV.getValue()).reset((int) Math.min(brlen, rlen - ((brix - 1) * brlen)), (int) clen);
                    out.put(tmpIMV.getIndexes(), tmpIMV);
                }
                // put single row into target block
                ((MatrixBlock) out.get(tmpIx).getValue()).copy((int) lbrix, (int) lbrix, 0, (int) clen - 1, src, false);
            }
        }
    } else // margin = "cols"
    {
        long rlen = linData.getNumRows();
        long clen = len;
        for (int i = 0; i < linOffset.getNumColumns(); i++) {
            long cix = (long) linOffset.quickGetValue(0, i);
            if (// otherwise empty row
            cix > 0) {
                // get single row from source block
                MatrixBlock src = (MatrixBlock) linData.slice(0, (int) (rlen - 1), i, i, new MatrixBlock());
                long bcix = (cix - 1) / bclen + 1;
                long lbcix = (cix - 1) % bclen;
                tmpIx.setIndexes(data.getIndexes().getRowIndex(), bcix);
                // create target block if necessary
                if (!out.containsKey(tmpIx)) {
                    IndexedMatrixValue tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
                    tmpIMV.getIndexes().setIndexes(tmpIx);
                    ((MatrixBlock) tmpIMV.getValue()).reset((int) rlen, (int) Math.min(bclen, clen - ((bcix - 1) * bclen)));
                    out.put(tmpIMV.getIndexes(), tmpIMV);
                }
                // put single row into target block
                ((MatrixBlock) out.get(tmpIx).getValue()).copy(0, (int) rlen - 1, (int) lbcix, (int) lbcix, src, false);
            }
        }
    }
    // prepare and return outputs (already in cached values)
    for (IndexedMatrixValue imv : out.values()) {
        ((MatrixBlock) imv.getValue()).recomputeNonZeros();
        outList.add(imv);
    }
}
Also used : HashMap(java.util.HashMap) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)64 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)32 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)31 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)16 ArrayList (java.util.ArrayList)14 DistributedCacheInput (org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput)12 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)9 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)6 Path (org.apache.hadoop.fs.Path)4 AggregateBinaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator)4 BinaryOperator (org.apache.sysml.runtime.matrix.operators.BinaryOperator)4 ReorgOperator (org.apache.sysml.runtime.matrix.operators.ReorgOperator)4 CTableMap (org.apache.sysml.runtime.matrix.data.CTableMap)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 JobConf (org.apache.hadoop.mapred.JobConf)2 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)2 DiagIndex (org.apache.sysml.runtime.functionobjects.DiagIndex)2