use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofCellwise method executeSparseNoAggDense.
private long executeSparseNoAggDense(SparseBlock sblock, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
// note: sequential scan algorithm for both sparse-safe and -unsafe
// in order to avoid binary search for sparse-unsafe
DenseBlock c = out.getDenseBlock();
long lnnz = 0;
for (int i = rl; i < ru; i++) {
int lastj = -1;
// handle non-empty rows
if (sblock != null && !sblock.isEmpty(i)) {
int apos = sblock.pos(i);
int alen = sblock.size(i);
int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
double[] cvals = c.values(i);
int cix = c.pos(i);
for (int k = apos; k < apos + alen; k++) {
// process zeros before current non-zero
if (!sparseSafe)
for (int j = lastj + 1; j < aix[k]; j++) lnnz += ((cvals[cix + j] = genexec(0, b, scalars, m, n, i, j)) != 0) ? 1 : 0;
// process current non-zero
lastj = aix[k];
lnnz += ((cvals[cix + lastj] = genexec(avals[k], b, scalars, m, n, i, lastj)) != 0) ? 1 : 0;
}
}
// process empty rows or remaining zeros
if (!sparseSafe)
for (int j = lastj + 1; j < n; j++) {
double[] cvals = c.values(i);
int cix = c.pos(i);
lnnz += ((cvals[cix + j] = genexec(0, b, scalars, m, n, i, j)) != 0) ? 1 : 0;
}
}
return lnnz;
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofCellwise method executeDense.
// ///////
// function dispatch
private long executeDense(DenseBlock a, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
DenseBlock c = out.getDenseBlock();
SideInput[] lb = createSparseSideInputs(b);
if (_type == CellType.NO_AGG) {
return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
} else if (_type == CellType.ROW_AGG) {
if (_aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ)
return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
else
return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
} else if (_type == CellType.COL_AGG) {
if (_aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ)
return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
else
return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru);
}
return -1;
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofOuterProduct method execute.
@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
// sanity check
if (inputs == null || inputs.size() < 3 || out == null)
throw new RuntimeException("Invalid input arguments.");
// check empty result
if (// U is empty
(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false)) || // V is empty
(_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false)) || inputs.get(0).isEmptyBlock(false)) {
// X is empty
// turn empty dense into sparse
out.examSparsity();
return out;
}
// input preparation and result allocation (Allocate the output that is set by Sigma2CPInstruction)
if (_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
// assign it to the time and sparse representation of the major input matrix
out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
} else {
// if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output
if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
// n*k
out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
else if (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT)
// m*k
out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
}
// check for empty inputs; otherwise allocate result
if (inputs.get(0).isEmptyBlock(false))
return out;
out.allocateBlock();
// input preparation
DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
// core sequential execute
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
// rank
final int k = inputs.get(1).getNumColumns();
MatrixBlock a = inputs.get(0);
switch(_outerProductType) {
case LEFT_OUTER_PRODUCT:
case RIGHT_OUTER_PRODUCT:
if (a instanceof CompressedMatrixBlock)
executeCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, ((CompressedMatrixBlock) a).getNumColGroups());
else if (!a.isInSparseFormat())
executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
else
executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
break;
case CELLWISE_OUTER_PRODUCT:
if (a instanceof CompressedMatrixBlock)
executeCellwiseCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n);
else if (!a.isInSparseFormat())
executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
else
executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
break;
case AGG_OUTER_PRODUCT:
throw new DMLRuntimeException("Wrong codepath for aggregate outer product.");
}
// post-processing
if (a instanceof CompressedMatrixBlock && out.isInSparseFormat() && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT)
out.sortSparseRows();
out.recomputeNonZeros();
out.examSparsity();
return out;
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofOuterProduct method executeCellwiseSparse.
private void executeCellwiseSparse(SparseBlock sblock, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) {
// NOTE: we don't create sparse side inputs w/ row-major cursors because
// cache blocking would lead to non-sequential access
final int blocksizeIJ = (int) (8L * m * n / nnz);
int[] curk = new int[Math.min(blocksizeIJ, ru - rl)];
if (// DENSE
!out.isInSparseFormat()) {
DenseBlock c = out.getDenseBlock();
double tmp = 0;
for (int bi = rl; bi < ru; bi += blocksizeIJ) {
int bimin = Math.min(ru, bi + blocksizeIJ);
// prepare starting indexes for block row
Arrays.fill(curk, 0);
// blocked execution over column blocks
for (int bj = 0; bj < n; bj += blocksizeIJ) {
int bjmin = Math.min(n, bj + blocksizeIJ);
for (int i = bi; i < bimin; i++) {
if (sblock.isEmpty(i))
continue;
int wpos = sblock.pos(i);
int wlen = sblock.size(i);
int[] wix = sblock.indexes(i);
double[] wvals = sblock.values(i);
double[] cvals = c.values(i);
double[] uvals = u.values(i);
int uix = u.pos(i);
int index = wpos + curk[i - bi];
if (type == OutProdType.CELLWISE_OUTER_PRODUCT)
for (; index < wpos + wlen && wix[index] < bjmin; index++) {
int jix = wix[index];
cvals[jix] = genexecCellwise(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
}
else
for (; index < wpos + wlen && wix[index] < bjmin; index++) {
int jix = wix[index];
tmp += genexecCellwise(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
}
curk[i - bi] = index - wpos;
}
}
}
if (type != OutProdType.CELLWISE_OUTER_PRODUCT)
c.set(0, 0, tmp);
} else // SPARSE
{
SparseBlock c = out.getSparseBlock();
for (int bi = rl; bi < ru; bi += blocksizeIJ) {
int bimin = Math.min(ru, bi + blocksizeIJ);
// prepare starting indexes for block row
Arrays.fill(curk, 0);
// blocked execution over column blocks
for (int bj = 0; bj < n; bj += blocksizeIJ) {
int bjmin = Math.min(n, bj + blocksizeIJ);
for (int i = bi; i < bimin; i++) {
if (sblock.isEmpty(i))
continue;
int wpos = sblock.pos(i);
int wlen = sblock.size(i);
int[] wix = sblock.indexes(i);
double[] wval = sblock.values(i);
double[] uvals = u.values(i);
int uix = u.pos(i);
int index = wpos + curk[i - bi];
for (; index < wpos + wlen && wix[index] < bjmin; index++) {
int jix = wix[index];
c.append(i, wix[index], genexecCellwise(wval[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]));
}
curk[i - bi] = index - wpos;
}
}
}
}
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofRowwise method execute.
@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
// redirect to serial execution
if (k <= 1 || (_type.isColumnAgg() && !LibMatrixMult.checkParColumnAgg(inputs.get(0), k, false)) || getTotalInputSize(inputs) < PAR_NUMCELL_THRESHOLD) {
return execute(inputs, scalarObjects, out);
}
// sanity check
if (inputs == null || inputs.size() < 1 || out == null)
throw new RuntimeException("Invalid input arguments.");
// result allocation and preparations
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
final int n2 = _type.isConstDim2(_constDim2) ? (int) _constDim2 : _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1;
allocateOutputMatrix(m, n, n2, out);
final boolean flipOut = _type.isRowTypeB1ColumnAgg() && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns());
// input preparation
MatrixBlock a = inputs.get(0);
SideInput[] b = prepInputMatrices(inputs, 1, inputs.size() - 1, false, _tB1);
double[] scalars = prepInputScalars(scalarObjects);
// core parallel execute
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<Integer> blklens = (a instanceof CompressedMatrixBlock) ? LibMatrixMult.getAlignedBlockSizes(m, k, BitmapEncoder.BITMAP_BLOCK_SZ) : LibMatrixMult.getBalancedBlockSizesDefault(m, k, (long) m * n < 16 * PAR_NUMCELL_THRESHOLD);
try {
if (_type.isColumnAgg() || _type == RowType.FULL_AGG) {
// execute tasks
ArrayList<ParColAggTask> tasks = new ArrayList<>();
int outLen = out.getNumRows() * out.getNumColumns();
for (int i = 0, lb = 0; i < blklens.size(); lb += blklens.get(i), i++) tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, lb, lb + blklens.get(i)));
List<Future<DenseBlock>> taskret = pool.invokeAll(tasks);
// aggregate partial results
int len = _type.isColumnAgg() ? out.getNumRows() * out.getNumColumns() : 1;
for (Future<DenseBlock> task : taskret) LibMatrixMult.vectAdd(task.get().valuesAt(0), out.getDenseBlockValues(), 0, 0, len);
out.recomputeNonZeros();
} else {
// execute tasks
ArrayList<ParExecTask> tasks = new ArrayList<>();
for (int i = 0, lb = 0; i < blklens.size(); lb += blklens.get(i), i++) tasks.add(new ParExecTask(a, b, out, scalars, n, n2, lb, lb + blklens.get(i)));
List<Future<Long>> taskret = pool.invokeAll(tasks);
// aggregate nnz, no need to aggregate results
long nnz = 0;
for (Future<Long> task : taskret) nnz += task.get();
out.setNonZeros(nnz);
}
pool.shutdown();
if (flipOut) {
fixTransposeDimensions(out);
out = LibMatrixReorg.transpose(out, new MatrixBlock(out.getNumColumns(), out.getNumRows(), false));
}
out.examSparsity();
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return out;
}
Aggregations