Search in sources :

Example 26 with DenseBlock

use of in project incubator-systemml by apache.

the class SpoofCellwise method executeSparseNoAggDense.

private long executeSparseNoAggDense(SparseBlock sblock, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
    // note: sequential scan algorithm for both sparse-safe and -unsafe
    // in order to avoid binary search for sparse-unsafe
    DenseBlock c = out.getDenseBlock();
    long lnnz = 0;
    for (int i = rl; i < ru; i++) {
        int lastj = -1;
        // handle non-empty rows
        if (sblock != null && !sblock.isEmpty(i)) {
            int apos = sblock.pos(i);
            int alen = sblock.size(i);
            int[] aix = sblock.indexes(i);
            double[] avals = sblock.values(i);
            double[] cvals = c.values(i);
            int cix = c.pos(i);
            for (int k = apos; k < apos + alen; k++) {
                // process zeros before current non-zero
                if (!sparseSafe)
                    for (int j = lastj + 1; j < aix[k]; j++) lnnz += ((cvals[cix + j] = genexec(0, b, scalars, m, n, i, j)) != 0) ? 1 : 0;
                // process current non-zero
                lastj = aix[k];
                lnnz += ((cvals[cix + lastj] = genexec(avals[k], b, scalars, m, n, i, lastj)) != 0) ? 1 : 0;
        // process empty rows or remaining zeros
        if (!sparseSafe)
            for (int j = lastj + 1; j < n; j++) {
                double[] cvals = c.values(i);
                int cix = c.pos(i);
                lnnz += ((cvals[cix + j] = genexec(0, b, scalars, m, n, i, j)) != 0) ? 1 : 0;
    return lnnz;
Also used : DenseBlock(

Example 27 with DenseBlock

use of in project incubator-systemml by apache.

the class SpoofCellwise method executeDense.

// ///////
// function dispatch
private long executeDense(DenseBlock a, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
    DenseBlock c = out.getDenseBlock();
    SideInput[] lb = createSparseSideInputs(b);
    if (_type == CellType.NO_AGG) {
        return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
    } else if (_type == CellType.ROW_AGG) {
        if (_aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ)
            return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
            return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
    } else if (_type == CellType.COL_AGG) {
        if (_aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ)
            return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
            return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
    return -1;
Also used : DenseBlock(

Example 28 with DenseBlock

use of in project incubator-systemml by apache.

the class SpoofOuterProduct method execute.

public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
    // sanity check
    if (inputs == null || inputs.size() < 3 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // check empty result
    if (// U is empty
    (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false)) || // V is empty
    (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false)) || inputs.get(0).isEmptyBlock(false)) {
        // X is empty
        // turn empty dense into sparse
        return out;
    // input preparation and result allocation (Allocate the output that is set by Sigma2CPInstruction)
    if (_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
        // assign it to the time and sparse representation of the major input matrix
        out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
    } else {
        // if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output
        if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
            // n*k
            out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
        else if (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT)
            // m*k
            out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
    // check for empty inputs; otherwise allocate result
    if (inputs.get(0).isEmptyBlock(false))
        return out;
    // input preparation
    DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
    SideInput[] b = prepInputMatrices(inputs, 3, false);
    double[] scalars = prepInputScalars(scalarObjects);
    // core sequential execute
    final int m = inputs.get(0).getNumRows();
    final int n = inputs.get(0).getNumColumns();
    // rank
    final int k = inputs.get(1).getNumColumns();
    MatrixBlock a = inputs.get(0);
    switch(_outerProductType) {
        case LEFT_OUTER_PRODUCT:
            if (a instanceof CompressedMatrixBlock)
                executeCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, ((CompressedMatrixBlock) a).getNumColGroups());
            else if (!a.isInSparseFormat())
                executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
                executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
            if (a instanceof CompressedMatrixBlock)
                executeCellwiseCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n);
            else if (!a.isInSparseFormat())
                executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
                executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
        case AGG_OUTER_PRODUCT:
            throw new DMLRuntimeException("Wrong codepath for aggregate outer product.");
    // post-processing
    if (a instanceof CompressedMatrixBlock && out.isInSparseFormat() && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT)
    return out;
Also used : DenseBlock( CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixBlock( CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 29 with DenseBlock

use of in project incubator-systemml by apache.

the class SpoofOuterProduct method executeCellwiseSparse.

private void executeCellwiseSparse(SparseBlock sblock, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) {
    // NOTE: we don't create sparse side inputs w/ row-major cursors because
    // cache blocking would lead to non-sequential access
    final int blocksizeIJ = (int) (8L * m * n / nnz);
    int[] curk = new int[Math.min(blocksizeIJ, ru - rl)];
    if (// DENSE
    !out.isInSparseFormat()) {
        DenseBlock c = out.getDenseBlock();
        double tmp = 0;
        for (int bi = rl; bi < ru; bi += blocksizeIJ) {
            int bimin = Math.min(ru, bi + blocksizeIJ);
            // prepare starting indexes for block row
            Arrays.fill(curk, 0);
            // blocked execution over column blocks
            for (int bj = 0; bj < n; bj += blocksizeIJ) {
                int bjmin = Math.min(n, bj + blocksizeIJ);
                for (int i = bi; i < bimin; i++) {
                    if (sblock.isEmpty(i))
                    int wpos = sblock.pos(i);
                    int wlen = sblock.size(i);
                    int[] wix = sblock.indexes(i);
                    double[] wvals = sblock.values(i);
                    double[] cvals = c.values(i);
                    double[] uvals = u.values(i);
                    int uix = u.pos(i);
                    int index = wpos + curk[i - bi];
                    if (type == OutProdType.CELLWISE_OUTER_PRODUCT)
                        for (; index < wpos + wlen && wix[index] < bjmin; index++) {
                            int jix = wix[index];
                            cvals[jix] = genexecCellwise(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
                        for (; index < wpos + wlen && wix[index] < bjmin; index++) {
                            int jix = wix[index];
                            tmp += genexecCellwise(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
                    curk[i - bi] = index - wpos;
        if (type != OutProdType.CELLWISE_OUTER_PRODUCT)
            c.set(0, 0, tmp);
    } else // SPARSE
        SparseBlock c = out.getSparseBlock();
        for (int bi = rl; bi < ru; bi += blocksizeIJ) {
            int bimin = Math.min(ru, bi + blocksizeIJ);
            // prepare starting indexes for block row
            Arrays.fill(curk, 0);
            // blocked execution over column blocks
            for (int bj = 0; bj < n; bj += blocksizeIJ) {
                int bjmin = Math.min(n, bj + blocksizeIJ);
                for (int i = bi; i < bimin; i++) {
                    if (sblock.isEmpty(i))
                    int wpos = sblock.pos(i);
                    int wlen = sblock.size(i);
                    int[] wix = sblock.indexes(i);
                    double[] wval = sblock.values(i);
                    double[] uvals = u.values(i);
                    int uix = u.pos(i);
                    int index = wpos + curk[i - bi];
                    for (; index < wpos + wlen && wix[index] < bjmin; index++) {
                        int jix = wix[index];
                        c.append(i, wix[index], genexecCellwise(wval[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]));
                    curk[i - bi] = index - wpos;
Also used : DenseBlock( SparseBlock(

Example 30 with DenseBlock

use of in project incubator-systemml by apache.

the class SpoofRowwise method execute.

public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
    // redirect to serial execution
    if (k <= 1 || (_type.isColumnAgg() && !LibMatrixMult.checkParColumnAgg(inputs.get(0), k, false)) || getTotalInputSize(inputs) < PAR_NUMCELL_THRESHOLD) {
        return execute(inputs, scalarObjects, out);
    // sanity check
    if (inputs == null || inputs.size() < 1 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // result allocation and preparations
    final int m = inputs.get(0).getNumRows();
    final int n = inputs.get(0).getNumColumns();
    final int n2 = _type.isConstDim2(_constDim2) ? (int) _constDim2 : _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1;
    allocateOutputMatrix(m, n, n2, out);
    final boolean flipOut = _type.isRowTypeB1ColumnAgg() && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns());
    // input preparation
    MatrixBlock a = inputs.get(0);
    SideInput[] b = prepInputMatrices(inputs, 1, inputs.size() - 1, false, _tB1);
    double[] scalars = prepInputScalars(scalarObjects);
    // core parallel execute
    ExecutorService pool = CommonThreadPool.get(k);
    ArrayList<Integer> blklens = (a instanceof CompressedMatrixBlock) ? LibMatrixMult.getAlignedBlockSizes(m, k, BitmapEncoder.BITMAP_BLOCK_SZ) : LibMatrixMult.getBalancedBlockSizesDefault(m, k, (long) m * n < 16 * PAR_NUMCELL_THRESHOLD);
    try {
        if (_type.isColumnAgg() || _type == RowType.FULL_AGG) {
            // execute tasks
            ArrayList<ParColAggTask> tasks = new ArrayList<>();
            int outLen = out.getNumRows() * out.getNumColumns();
            for (int i = 0, lb = 0; i < blklens.size(); lb += blklens.get(i), i++) tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, lb, lb + blklens.get(i)));
            List<Future<DenseBlock>> taskret = pool.invokeAll(tasks);
            // aggregate partial results
            int len = _type.isColumnAgg() ? out.getNumRows() * out.getNumColumns() : 1;
            for (Future<DenseBlock> task : taskret) LibMatrixMult.vectAdd(task.get().valuesAt(0), out.getDenseBlockValues(), 0, 0, len);
        } else {
            // execute tasks
            ArrayList<ParExecTask> tasks = new ArrayList<>();
            for (int i = 0, lb = 0; i < blklens.size(); lb += blklens.get(i), i++) tasks.add(new ParExecTask(a, b, out, scalars, n, n2, lb, lb + blklens.get(i)));
            List<Future<Long>> taskret = pool.invokeAll(tasks);
            // aggregate nnz, no need to aggregate results
            long nnz = 0;
            for (Future<Long> task : taskret) nnz += task.get();
        if (flipOut) {
            out = LibMatrixReorg.transpose(out, new MatrixBlock(out.getNumColumns(), out.getNumRows(), false));
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    return out;
Also used : MatrixBlock( CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) ArrayList(java.util.ArrayList) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DenseBlock( DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)


DenseBlock ( DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 MatrixBlock ( CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)10 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)10 BufferedReader ( BufferedWriter ( File ( InputStreamReader ( OutputStreamWriter ( ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 SequenceFile ( JobConf (org.apache.hadoop.mapred.JobConf)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 IJV (