Search in sources :

Example 61 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.

the class LibMatrixReorg method rmempty.

/**
 * MR rmempty interface - for rmempty we cannot view blocks independently, and hence,
 * there are different CP and MR interfaces.
 *
 * @param data ?
 * @param offset ?
 * @param rmRows ?
 * @param len ?
 * @param brlen number of rows in a block
 * @param bclen number of columns in a block
 * @param outList list of indexed matrix values
 */
public static void rmempty(IndexedMatrixValue data, IndexedMatrixValue offset, boolean rmRows, long len, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
    // sanity check inputs
    if (!(data.getValue() instanceof MatrixBlock && offset.getValue() instanceof MatrixBlock))
        throw new DMLRuntimeException("Unsupported input data: expected " + MatrixBlock.class.getName() + " but got " + data.getValue().getClass().getName() + " and " + offset.getValue().getClass().getName());
    if (rmRows && data.getValue().getNumRows() != offset.getValue().getNumRows() || !rmRows && data.getValue().getNumColumns() != offset.getValue().getNumColumns()) {
        throw new DMLRuntimeException("Dimension mismatch between input data and offsets: [" + data.getValue().getNumRows() + "x" + data.getValue().getNumColumns() + " vs " + offset.getValue().getNumRows() + "x" + offset.getValue().getNumColumns());
    }
    // compute outputs (at most two output blocks)
    HashMap<MatrixIndexes, IndexedMatrixValue> out = new HashMap<>();
    MatrixBlock linData = (MatrixBlock) data.getValue();
    MatrixBlock linOffset = (MatrixBlock) offset.getValue();
    MatrixIndexes tmpIx = new MatrixIndexes(-1, -1);
    if (// margin = "rows"
    rmRows) {
        long rlen = len;
        long clen = linData.getNumColumns();
        for (int i = 0; i < linOffset.getNumRows(); i++) {
            long rix = (long) linOffset.quickGetValue(i, 0);
            if (// otherwise empty row
            rix > 0) {
                // get single row from source block
                MatrixBlock src = (MatrixBlock) linData.slice(i, i, 0, (int) (clen - 1), new MatrixBlock());
                long brix = (rix - 1) / brlen + 1;
                long lbrix = (rix - 1) % brlen;
                tmpIx.setIndexes(brix, data.getIndexes().getColumnIndex());
                // create target block if necessary
                if (!out.containsKey(tmpIx)) {
                    IndexedMatrixValue tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
                    tmpIMV.getIndexes().setIndexes(tmpIx);
                    ((MatrixBlock) tmpIMV.getValue()).reset((int) Math.min(brlen, rlen - ((brix - 1) * brlen)), (int) clen);
                    out.put(tmpIMV.getIndexes(), tmpIMV);
                }
                // put single row into target block
                ((MatrixBlock) out.get(tmpIx).getValue()).copy((int) lbrix, (int) lbrix, 0, (int) clen - 1, src, false);
            }
        }
    } else // margin = "cols"
    {
        long rlen = linData.getNumRows();
        long clen = len;
        for (int i = 0; i < linOffset.getNumColumns(); i++) {
            long cix = (long) linOffset.quickGetValue(0, i);
            if (// otherwise empty row
            cix > 0) {
                // get single row from source block
                MatrixBlock src = (MatrixBlock) linData.slice(0, (int) (rlen - 1), i, i, new MatrixBlock());
                long bcix = (cix - 1) / bclen + 1;
                long lbcix = (cix - 1) % bclen;
                tmpIx.setIndexes(data.getIndexes().getRowIndex(), bcix);
                // create target block if necessary
                if (!out.containsKey(tmpIx)) {
                    IndexedMatrixValue tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
                    tmpIMV.getIndexes().setIndexes(tmpIx);
                    ((MatrixBlock) tmpIMV.getValue()).reset((int) rlen, (int) Math.min(bclen, clen - ((bcix - 1) * bclen)));
                    out.put(tmpIMV.getIndexes(), tmpIMV);
                }
                // put single row into target block
                ((MatrixBlock) out.get(tmpIx).getValue()).copy(0, (int) rlen - 1, (int) lbcix, (int) lbcix, src, false);
            }
        }
    }
    // prepare and return outputs (already in cached values)
    for (IndexedMatrixValue imv : out.values()) {
        ((MatrixBlock) imv.getValue()).recomputeNonZeros();
        outList.add(imv);
    }
}
Also used : HashMap(java.util.HashMap) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 62 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.

the class LibMatrixReorg method rexpand.

/**
 * MR/Spark rexpand operation (single input, multiple outputs incl empty blocks)
 *
 * @param data indexed matrix value
 * @param max ?
 * @param rows ?
 * @param cast ?
 * @param ignore ?
 * @param brlen number of rows in a block
 * @param bclen number of columns in a block
 * @param outList list of indexed matrix values
 */
public static void rexpand(IndexedMatrixValue data, double max, boolean rows, boolean cast, boolean ignore, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
    // prepare parameters
    MatrixIndexes ix = data.getIndexes();
    MatrixBlock in = (MatrixBlock) data.getValue();
    // execute rexpand operations incl sanity checks
    // TODO more robust (memory efficient) implementation w/o tmp block
    MatrixBlock tmp = rexpand(in, new MatrixBlock(), max, rows, cast, ignore, 1);
    // prepare outputs blocks (slice tmp block into output blocks )
    if (// expanded vertically
    rows) {
        for (int rl = 0; rl < tmp.getNumRows(); rl += brlen) {
            MatrixBlock mb = tmp.slice(rl, (int) (Math.min(rl + brlen, tmp.getNumRows()) - 1), 0, tmp.getNumColumns() - 1, new MatrixBlock());
            outList.add(new IndexedMatrixValue(new MatrixIndexes(rl / brlen + 1, ix.getRowIndex()), mb));
        }
    } else // expanded horizontally
    {
        for (int cl = 0; cl < tmp.getNumColumns(); cl += bclen) {
            MatrixBlock mb = tmp.slice(0, tmp.getNumRows() - 1, cl, (int) (Math.min(cl + bclen, tmp.getNumColumns()) - 1), new MatrixBlock());
            outList.add(new IndexedMatrixValue(new MatrixIndexes(ix.getRowIndex(), cl / bclen + 1), mb));
        }
    }
}
Also used : IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 63 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.

the class ReaderBinaryBlock method readIndexedMatrixBlocksFromHDFS.

public ArrayList<IndexedMatrixValue> readIndexedMatrixBlocksFromHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
    // allocate output matrix block collection
    ArrayList<IndexedMatrixValue> ret = new ArrayList<>();
    // prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path((_localFS ? "file:///" : "") + fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    // check existence and non-empty file
    checkValidInputFile(fs, path);
    // core read
    readBinaryBlockMatrixBlocksFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) JobConf(org.apache.hadoop.mapred.JobConf)

Example 64 with IndexedMatrixValue

use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.

the class CpmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get rdd inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    if (!_outputEmptyBlocks || _aggtype == SparkAggType.SINGLE_BLOCK) {
        // prune empty blocks of ultra-sparse matrices
        in1 = in1.filter(new FilterNonEmptyBlocksFunction());
        in2 = in2.filter(new FilterNonEmptyBlocksFunction());
    }
    // compute preferred join degree of parallelism
    int numPreferred = getPreferredParJoin(mc1, mc2, in1.getNumPartitions(), in2.getNumPartitions());
    int numPartJoin = Math.min(getMaxParJoin(mc1, mc2), numPreferred);
    // process core cpmm matrix multiply
    JavaPairRDD<Long, IndexedMatrixValue> tmp1 = in1.mapToPair(new CpmmIndexFunction(true));
    JavaPairRDD<Long, IndexedMatrixValue> tmp2 = in2.mapToPair(new CpmmIndexFunction(false));
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join over common dimension
    numPartJoin).mapToPair(// compute block multiplications
    new CpmmMultiplyFunction());
    // process cpmm aggregation and handle outputs
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        // prune empty blocks and aggregate all results
        out = out.filter(new FilterNonEmptyBlocksFunction());
        MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
    } else {
        // DEFAULT: MULTI_BLOCK
        if (!_outputEmptyBlocks)
            out = out.filter(new FilterNonEmptyBlocksFunction());
        out = RDDAggregateUtils.sumByKeyStable(out, false);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        // update output statistics if not inferred
        updateBinaryMMOutputMatrixCharacteristics(sec, true);
    }
}
Also used : FilterNonEmptyBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)64 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)32 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)31 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)16 ArrayList (java.util.ArrayList)14 DistributedCacheInput (org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput)12 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)9 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)6 Path (org.apache.hadoop.fs.Path)4 AggregateBinaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator)4 BinaryOperator (org.apache.sysml.runtime.matrix.operators.BinaryOperator)4 ReorgOperator (org.apache.sysml.runtime.matrix.operators.ReorgOperator)4 CTableMap (org.apache.sysml.runtime.matrix.data.CTableMap)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 JobConf (org.apache.hadoop.mapred.JobConf)2 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)2 DiagIndex (org.apache.sysml.runtime.functionobjects.DiagIndex)2