Search in sources :

Example 11 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.

the class ColGroupRLE method computeRowSums.

@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
    // note: due to corrections the output might be a large dense block
    DenseBlock c = result.getDenseBlock();
    KahanObject kbuff = new KahanObject(0, 0);
    KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
    final int numVals = getNumValues();
    if (ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
        final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
        // step 1: prepare position and value arrays
        // current pos / values per RLE list
        int[] astart = new int[numVals];
        int[] apos = skipScan(numVals, rl, astart);
        double[] aval = sumAllValues(kplus, kbuff, false);
        // step 2: cache conscious matrix-vector via horizontal scans
        for (int bi = rl; bi < ru; bi += blksz) {
            int bimax = Math.min(bi + blksz, ru);
            // horizontal segment scan, incl pos maintenance
            for (int k = 0; k < numVals; k++) {
                int boff = _ptr[k];
                int blen = len(k);
                double val = aval[k];
                int bix = apos[k];
                int start = astart[k];
                // compute partial results, not aligned
                while (bix < blen) {
                    int lstart = _data[boff + bix];
                    int llen = _data[boff + bix + 1];
                    int from = Math.max(bi, start + lstart);
                    int to = Math.min(start + lstart + llen, bimax);
                    for (int rix = from; rix < to; rix++) {
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                    if (start + lstart + llen >= bimax)
                        break;
                    start += lstart + llen;
                    bix += 2;
                }
                apos[k] = bix;
                astart[k] = start;
            }
        }
    } else {
        for (int k = 0; k < numVals; k++) {
            int boff = _ptr[k];
            int blen = len(k);
            double val = sumValues(k, kplus, kbuff);
            if (val != 0.0) {
                Pair<Integer, Integer> tmp = skipScanVal(k, rl);
                int bix = tmp.getKey();
                int curRunStartOff = tmp.getValue();
                int curRunEnd = tmp.getValue();
                for (; bix < blen && curRunEnd < ru; bix += 2) {
                    curRunStartOff = curRunEnd + _data[boff + bix];
                    curRunEnd = curRunStartOff + _data[boff + bix + 1];
                    for (int rix = curRunStartOff; rix < curRunEnd && rix < ru; rix++) {
                        double[] cvals = c.values(rix);
                        int cix = c.pos(rix);
                        kbuff.set(cvals[cix], cvals[cix + 1]);
                        kplus2.execute2(kbuff, val);
                        cvals[cix] = kbuff._sum;
                        cvals[cix + 1] = kbuff._correction;
                    }
                }
            }
        }
    }
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) KahanObject(org.apache.sysml.runtime.instructions.cp.KahanObject) KahanPlus(org.apache.sysml.runtime.functionobjects.KahanPlus)

Example 12 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class SpoofCellwise method executeDense.

// ///////
// function dispatch
private long executeDense(DenseBlock a, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
    DenseBlock c = out.getDenseBlock();
    SideInput[] lb = createSparseSideInputs(b);
    if (_type == CellType.NO_AGG) {
        return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
    } else if (_type == CellType.ROW_AGG) {
        if (_aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ)
            return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
        else
            return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
    } else if (_type == CellType.COL_AGG) {
        if (_aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ)
            return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
        else
            return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
    }
    return -1;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock)

Example 13 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class SpoofOuterProduct method execute.

@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int numThreads) {
    // sanity check
    if (inputs == null || inputs.size() < 3 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // check empty result
    if (// U is empty
    (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false)) || // V is empty
    (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false)) || inputs.get(0).isEmptyBlock(false)) {
        // X is empty
        // turn empty dense into sparse
        out.examSparsity();
        return out;
    }
    // input preparation and result allocation (Allocate the output that is set by Sigma2CPInstruction)
    if (_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
        // assign it to the time and sparse representation of the major input matrix
        out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
        out.allocateBlock();
    } else {
        // if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output
        if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
            // n*k
            out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
        else if (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT)
            // m*k
            out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
        out.allocateDenseBlock();
    }
    if (2 * inputs.get(0).getNonZeros() * inputs.get(1).getNumColumns() < PAR_MINFLOP_THRESHOLD)
        // sequential
        return execute(inputs, scalarObjects, out);
    // input preparation
    DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
    SideInput[] b = prepInputMatrices(inputs, 3, false);
    double[] scalars = prepInputScalars(scalarObjects);
    // core sequential execute
    final int m = inputs.get(0).getNumRows();
    final int n = inputs.get(0).getNumColumns();
    // rank
    final int k = inputs.get(1).getNumColumns();
    final long nnz = inputs.get(0).getNonZeros();
    MatrixBlock a = inputs.get(0);
    try {
        ExecutorService pool = CommonThreadPool.get(numThreads);
        ArrayList<ParExecTask> tasks = new ArrayList<>();
        if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT) {
            if (a instanceof CompressedMatrixBlock) {
                // parallelize over column groups
                int numCG = ((CompressedMatrixBlock) a).getNumColGroups();
                int blklen = (int) (Math.ceil((double) numCG / numThreads));
                for (int j = 0; j < numThreads & j * blklen < numCG; j++) tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j * blklen, Math.min((j + 1) * blklen, numCG)));
            } else {
                // parallelize over column partitions
                int blklen = (int) (Math.ceil((double) n / numThreads));
                for (int j = 0; j < numThreads & j * blklen < n; j++) tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j * blklen, Math.min((j + 1) * blklen, n)));
            }
        } else {
            // right or cell-wise
            // parallelize over row partitions
            int numThreads2 = getPreferredNumberOfTasks(m, n, nnz, k, numThreads);
            int blklen = (int) (Math.ceil((double) m / numThreads2));
            for (int i = 0; i < numThreads2 & i * blklen < m; i++) tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, i * blklen, Math.min((i + 1) * blklen, m), 0, n));
        }
        List<Future<Long>> taskret = pool.invokeAll(tasks);
        pool.shutdown();
        for (Future<Long> task : taskret) out.setNonZeros(out.getNonZeros() + task.get());
    } catch (Exception e) {
        throw new DMLRuntimeException(e);
    }
    // post-processing
    if (a instanceof CompressedMatrixBlock) {
        if (out.isInSparseFormat() && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT)
            out.sortSparseRows();
        else if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
            out.recomputeNonZeros();
    }
    out.examSparsity();
    return out;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Example 14 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class SpoofRowwise method execute.

public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, boolean aggIncr) {
    // sanity check
    if (inputs == null || inputs.size() < 1 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // result allocation and preparations
    final int m = inputs.get(0).getNumRows();
    final int n = inputs.get(0).getNumColumns();
    final int n2 = _type.isConstDim2(_constDim2) ? (int) _constDim2 : _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1;
    if (!aggIncr || !out.isAllocated())
        allocateOutputMatrix(m, n, n2, out);
    DenseBlock c = out.getDenseBlock();
    final boolean flipOut = _type.isRowTypeB1ColumnAgg() && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns());
    // input preparation
    SideInput[] b = prepInputMatrices(inputs, 1, inputs.size() - 1, false, _tB1);
    double[] scalars = prepInputScalars(scalarObjects);
    // setup thread-local memory if necessary
    if (allocTmp && _reqVectMem > 0)
        LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n, n2);
    // core sequential execute
    MatrixBlock a = inputs.get(0);
    if (a instanceof CompressedMatrixBlock)
        executeCompressed((CompressedMatrixBlock) a, b, scalars, c, n, 0, m);
    else if (!a.isInSparseFormat())
        executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m);
    else
        executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m);
    // post-processing
    if (allocTmp && _reqVectMem > 0)
        LibSpoofPrimitives.cleanupThreadLocalMemory();
    if (flipOut) {
        fixTransposeDimensions(out);
        out = LibMatrixReorg.transpose(out, new MatrixBlock(out.getNumColumns(), out.getNumRows(), false));
    }
    if (!aggIncr) {
        out.recomputeNonZeros();
        out.examSparsity();
    }
    return out;
}
Also used : DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 15 with DenseBlock

use of org.apache.sysml.runtime.matrix.data.DenseBlock in project systemml by apache.

the class SpoofRowwise method execute.

@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
    // redirect to serial execution
    if (k <= 1 || (_type.isColumnAgg() && !LibMatrixMult.satisfiesMultiThreadingConstraints(inputs.get(0), k)) || getTotalInputSize(inputs) < PAR_NUMCELL_THRESHOLD) {
        return execute(inputs, scalarObjects, out);
    }
    // sanity check
    if (inputs == null || inputs.size() < 1 || out == null)
        throw new RuntimeException("Invalid input arguments.");
    // result allocation and preparations
    final int m = inputs.get(0).getNumRows();
    final int n = inputs.get(0).getNumColumns();
    final int n2 = _type.isConstDim2(_constDim2) ? (int) _constDim2 : _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1;
    allocateOutputMatrix(m, n, n2, out);
    final boolean flipOut = _type.isRowTypeB1ColumnAgg() && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns());
    // input preparation
    MatrixBlock a = inputs.get(0);
    SideInput[] b = prepInputMatrices(inputs, 1, inputs.size() - 1, false, _tB1);
    double[] scalars = prepInputScalars(scalarObjects);
    // core parallel execute
    ExecutorService pool = CommonThreadPool.get(k);
    ArrayList<Integer> blklens = (a instanceof CompressedMatrixBlock) ? UtilFunctions.getAlignedBlockSizes(m, k, BitmapEncoder.BITMAP_BLOCK_SZ) : UtilFunctions.getBalancedBlockSizesDefault(m, k, (long) m * n < 16 * PAR_NUMCELL_THRESHOLD);
    try {
        if (_type.isColumnAgg() || _type == RowType.FULL_AGG) {
            // execute tasks
            ArrayList<ParColAggTask> tasks = new ArrayList<>();
            int outLen = out.getNumRows() * out.getNumColumns();
            for (int i = 0, lb = 0; i < blklens.size(); lb += blklens.get(i), i++) tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, lb, lb + blklens.get(i)));
            List<Future<DenseBlock>> taskret = pool.invokeAll(tasks);
            // aggregate partial results
            int len = _type.isColumnAgg() ? out.getNumRows() * out.getNumColumns() : 1;
            for (Future<DenseBlock> task : taskret) LibMatrixMult.vectAdd(task.get().valuesAt(0), out.getDenseBlockValues(), 0, 0, len);
            out.recomputeNonZeros();
        } else {
            // execute tasks
            ArrayList<ParExecTask> tasks = new ArrayList<>();
            for (int i = 0, lb = 0; i < blklens.size(); lb += blklens.get(i), i++) tasks.add(new ParExecTask(a, b, out, scalars, n, n2, lb, lb + blklens.get(i)));
            List<Future<Long>> taskret = pool.invokeAll(tasks);
            // aggregate nnz, no need to aggregate results
            long nnz = 0;
            for (Future<Long> task : taskret) nnz += task.get();
            out.setNonZeros(nnz);
        }
        pool.shutdown();
        if (flipOut) {
            fixTransposeDimensions(out);
            out = LibMatrixReorg.transpose(out, new MatrixBlock(out.getNumColumns(), out.getNumRows(), false));
        }
        out.examSparsity();
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return out;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) ArrayList(java.util.ArrayList) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) DenseBlock(org.apache.sysml.runtime.matrix.data.DenseBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Aggregations

DenseBlock (org.apache.sysml.runtime.matrix.data.DenseBlock)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)20 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 KahanPlus (org.apache.sysml.runtime.functionobjects.KahanPlus)10 KahanObject (org.apache.sysml.runtime.instructions.cp.KahanObject)10 BufferedReader (java.io.BufferedReader)6 BufferedWriter (java.io.BufferedWriter)6 File (java.io.File)6 InputStreamReader (java.io.InputStreamReader)6 OutputStreamWriter (java.io.OutputStreamWriter)6 ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)6 Future (java.util.concurrent.Future)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 SequenceFile (org.apache.hadoop.io.SequenceFile)6 JobConf (org.apache.hadoop.mapred.JobConf)6 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)6 IJV (org.apache.sysml.runtime.matrix.data.IJV)6