Search in sources :

Example 41 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class SpoofOuterProduct method execute.

@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
    // sanity check
    if (inputs == null || inputs.size() < 3 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // check empty result
    if (// U is empty
    (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false)) || // V is empty
    (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false)) || inputs.get(0).isEmptyBlock(false)) {
        // X is empty
        // turn empty dense into sparse
        out.examSparsity();
        return out;
    }
    // input preparation and result allocation (Allocate the output that is set by Sigma2CPInstruction)
    if (_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
        // assign it to the time and sparse representation of the major input matrix
        out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
    } else {
        // if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output
        if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
            // n*k
            out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
        else if (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT)
            // m*k
            out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
    }
    // check for empty inputs; otherwise allocate result
    if (inputs.get(0).isEmptyBlock(false))
        return out;
    out.allocateBlock();
    // input preparation
    DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
    SideInput[] b = prepInputMatrices(inputs, 3, false);
    double[] scalars = prepInputScalars(scalarObjects);
    // core sequential execute
    final int m = inputs.get(0).getNumRows();
    final int n = inputs.get(0).getNumColumns();
    // rank
    final int k = inputs.get(1).getNumColumns();
    MatrixBlock a = inputs.get(0);
    switch(_outerProductType) {
        case LEFT_OUTER_PRODUCT:
        case RIGHT_OUTER_PRODUCT:
            if (a instanceof CompressedMatrixBlock)
                executeCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, ((CompressedMatrixBlock) a).getNumColGroups());
            else if (!a.isInSparseFormat())
                executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
            else
                executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
            break;
        case CELLWISE_OUTER_PRODUCT:
            if (a instanceof CompressedMatrixBlock)
                executeCellwiseCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n);
            else if (!a.isInSparseFormat())
                executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
            else
                executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
            break;
        case AGG_OUTER_PRODUCT:
            throw new DMLRuntimeException("Wrong codepath for aggregate outer product.");
    }
    // post-processing
    if (a instanceof CompressedMatrixBlock && out.isInSparseFormat() && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT)
        out.sortSparseRows();
    out.recomputeNonZeros();
    out.examSparsity();
    return out;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 42 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class SpoofOuterProduct method executeCellwiseSparse.

private void executeCellwiseSparse(SparseBlock sblock, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) {
    // NOTE: we don't create sparse side inputs w/ row-major cursors because
    // cache blocking would lead to non-sequential access
    final int blocksizeIJ = (int) (8L * m * n / nnz);
    int[] curk = new int[Math.min(blocksizeIJ, ru - rl)];
    if (// DENSE
    !out.isInSparseFormat()) {
        DenseBlock c = out.getDenseBlock();
        double tmp = 0;
        for (int bi = rl; bi < ru; bi += blocksizeIJ) {
            int bimin = Math.min(ru, bi + blocksizeIJ);
            // prepare starting indexes for block row
            Arrays.fill(curk, 0);
            // blocked execution over column blocks
            for (int bj = 0; bj < n; bj += blocksizeIJ) {
                int bjmin = Math.min(n, bj + blocksizeIJ);
                for (int i = bi; i < bimin; i++) {
                    if (sblock.isEmpty(i))
                        continue;
                    int wpos = sblock.pos(i);
                    int wlen = sblock.size(i);
                    int[] wix = sblock.indexes(i);
                    double[] wvals = sblock.values(i);
                    double[] cvals = c.values(i);
                    double[] uvals = u.values(i);
                    int uix = u.pos(i);
                    int index = wpos + curk[i - bi];
                    if (type == OutProdType.CELLWISE_OUTER_PRODUCT)
                        for (; index < wpos + wlen && wix[index] < bjmin; index++) {
                            int jix = wix[index];
                            cvals[jix] = genexecCellwise(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
                        }
                    else
                        for (; index < wpos + wlen && wix[index] < bjmin; index++) {
                            int jix = wix[index];
                            tmp += genexecCellwise(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
                        }
                    curk[i - bi] = index - wpos;
                }
            }
        }
        if (type != OutProdType.CELLWISE_OUTER_PRODUCT)
            c.set(0, 0, tmp);
    } else // SPARSE
    {
        SparseBlock c = out.getSparseBlock();
        for (int bi = rl; bi < ru; bi += blocksizeIJ) {
            int bimin = Math.min(ru, bi + blocksizeIJ);
            // prepare starting indexes for block row
            Arrays.fill(curk, 0);
            // blocked execution over column blocks
            for (int bj = 0; bj < n; bj += blocksizeIJ) {
                int bjmin = Math.min(n, bj + blocksizeIJ);
                for (int i = bi; i < bimin; i++) {
                    if (sblock.isEmpty(i))
                        continue;
                    int wpos = sblock.pos(i);
                    int wlen = sblock.size(i);
                    int[] wix = sblock.indexes(i);
                    double[] wval = sblock.values(i);
                    double[] uvals = u.values(i);
                    int uix = u.pos(i);
                    int index = wpos + curk[i - bi];
                    for (; index < wpos + wlen && wix[index] < bjmin; index++) {
                        int jix = wix[index];
                        c.append(i, wix[index], genexecCellwise(wval[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]));
                    }
                    curk[i - bi] = index - wpos;
                }
            }
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) SparseBlock(org.apache.sysml.runtime.matrix.data.SparseBlock)

Example 43 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ColGroupDDC2 method computeRowSums.

@Override
protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    // pre-aggregate nnz per value tuple
    double[] vals = sumAllValues(kplus, kbuff, false);
    // for correctness in case of sqk+)
    for (int i = rl; i < ru; i++) {
        double[] cvals = c.values(i);
        int cix = c.pos(i);
        kbuff.set(cvals[cix], cvals[cix + 1]);
        kplus2.execute2(kbuff, vals[_data[i]]);
        cvals[cix] = kbuff._sum;
        cvals[cix + 1] = kbuff._correction;
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 44 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ColGroupOLE method computeRowSums.

@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
    final int numVals = getNumValues();
    if (ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz) {
        final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
        // step 1: prepare position and value arrays
        int[] apos = skipScan(numVals, rl);
        double[] aval = sumAllValues(kplus, kbuff, false);
        // step 2: cache conscious row sums via horizontal scans
        for (int bi = rl; bi < ru; bi += blksz2) {
            int bimax = Math.min(bi + blksz2, ru);
            // horizontal segment scan, incl pos maintenance
            for (int k = 0; k < numVals; k++) {
                int boff = _ptr[k];
                int blen = len(k);
                double val = aval[k];
                int bix = apos[k];
                for (int ii = bi; ii < bimax && bix < blen; ii += blksz) {
                    // prepare length, start, and end pos
                    int len = _data[boff + bix];
                    int pos = boff + bix + 1;
                    // compute partial results
                    for (int i = 0; i < len; i++) {
                        int rix = ii + _data[pos + i];
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                    bix += len + 1;
                }
                apos[k] = bix;
            }
        }
    } else {
        // iterate over all values and their bitmaps
        for (int k = 0; k < numVals; k++) {
            // prepare value-to-add for entire value bitmap
            int boff = _ptr[k];
            int blen = len(k);
            double val = sumValues(k, kplus, kbuff);
            // iterate over bitmap blocks and add values
            if (val != 0) {
                int slen;
                int bix = skipScanVal(k, rl);
                for (int off = ((rl + 1) / blksz) * blksz; bix < blen && off < ru; bix += slen + 1, off += blksz) {
                    slen = _data[boff + bix];
                    for (int i = 1; i <= slen; i++) {
                        int rix = off + _data[boff + bix + i];
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                }
            }
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 45 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class ColGroupRLE method computeRowSums.

@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    final int numVals = getNumValues();
    if (ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
        final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
        // step 1: prepare position and value arrays
        // current pos / values per RLE list
        int[] astart = new int[numVals];
        int[] apos = skipScan(numVals, rl, astart);
        double[] aval = sumAllValues(kplus, kbuff, false);
        // step 2: cache conscious matrix-vector via horizontal scans
        for (int bi = rl; bi < ru; bi += blksz) {
            int bimax = Math.min(bi + blksz, ru);
            // horizontal segment scan, incl pos maintenance
            for (int k = 0; k < numVals; k++) {
                int boff = _ptr[k];
                int blen = len(k);
                double val = aval[k];
                int bix = apos[k];
                int start = astart[k];
                // compute partial results, not aligned
                while (bix < blen) {
                    int lstart = _data[boff + bix];
                    int llen = _data[boff + bix + 1];
                    int from = Math.max(bi, start + lstart);
                    int to = Math.min(start + lstart + llen, bimax);
                    for (int rix = from; rix < to; rix++) {
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                    if (start + lstart + llen >= bimax)
                        break;
                    start += lstart + llen;
                    bix += 2;
                }
                apos[k] = bix;
                astart[k] = start;
            }
        }
    } else {
        for (int k = 0; k < numVals; k++) {
            int boff = _ptr[k];
            int blen = len(k);
            double val = sumValues(k, kplus, kbuff);
            if (val != 0.0) {
                Pair<Integer, Integer> tmp = skipScanVal(k, rl);
                int bix = tmp.getKey();
                int curRunStartOff = tmp.getValue();
                int curRunEnd = tmp.getValue();
                for (; bix < blen && curRunEnd < ru; bix += 2) {
                    curRunStartOff = curRunEnd + _data[boff + bix];
                    curRunEnd = curRunStartOff + _data[boff + bix + 1];
                    for (int rix = curRunStartOff; rix < curRunEnd && rix < ru; rix++) {
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                }
            }
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Aggregations

DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)20 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)10 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)10 BufferedReader (java.io.BufferedReader)6 BufferedWriter (java.io.BufferedWriter)6 File (java.io.File)6 InputStreamReader (java.io.InputStreamReader)6 OutputStreamWriter (java.io.OutputStreamWriter)6 ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 SequenceFile (org.apache.hadoop.io.SequenceFile)6 JobConf (org.apache.hadoop.mapred.JobConf)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)6