use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofOuterProduct method execute.
@Override
public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int numThreads) {
// sanity check
if (inputs == null || inputs.size() < 3)
throw new RuntimeException("Invalid input arguments.");
if (inputs.get(0).isEmptyBlock(false))
return new DoubleObject(0);
if (2 * inputs.get(0).getNonZeros() * inputs.get(1).getNumColumns() < PAR_MINFLOP_THRESHOLD)
// sequential
return execute(inputs, scalarObjects);
// input preparation
DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
// core sequential execute
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
// rank
final int k = inputs.get(1).getNumColumns();
final long nnz = inputs.get(0).getNonZeros();
double sum = 0;
try {
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<ParOuterProdAggTask> tasks = new ArrayList<>();
int numThreads2 = getPreferredNumberOfTasks(m, n, nnz, k, numThreads);
int blklen = (int) (Math.ceil((double) m / numThreads2));
for (int i = 0; i < numThreads2 & i * blklen < m; i++) tasks.add(new ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, m, n, k, _outerProductType, i * blklen, Math.min((i + 1) * blklen, m), 0, n));
// execute tasks
List<Future<Double>> taskret = pool.invokeAll(tasks);
pool.shutdown();
for (Future<Double> task : taskret) sum += task.get();
} catch (Exception e) {
throw new DMLRuntimeException(e);
}
return new DoubleObject(sum);
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofOuterProduct method execute.
@Override
public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects) {
// sanity check
if (inputs == null || inputs.size() < 3)
throw new RuntimeException("Invalid input arguments.");
if (inputs.get(0).isEmptyBlock(false))
return new DoubleObject(0);
// input preparation
DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
// core sequential execute
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
// rank
final int k = inputs.get(1).getNumColumns();
MatrixBlock a = inputs.get(0);
MatrixBlock out = new MatrixBlock(1, 1, false);
out.allocateDenseBlock();
if (a instanceof CompressedMatrixBlock)
executeCellwiseCompressed((CompressedMatrixBlock) a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n);
else if (!a.isInSparseFormat())
executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
else
executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
return new DoubleObject(out.getDenseBlock().get(0, 0));
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class SpoofRowwise method execute.
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, boolean aggIncr) {
// sanity check
if (inputs == null || inputs.size() < 1 || out == null)
throw new RuntimeException("Invalid input arguments.");
// result allocation and preparations
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
final int n2 = _type.isConstDim2(_constDim2) ? (int) _constDim2 : _type.isRowTypeB1() || hasMatrixSideInput(inputs) ? getMinColsMatrixSideInputs(inputs) : -1;
if (!aggIncr || !out.isAllocated())
allocateOutputMatrix(m, n, n2, out);
DenseBlock c = out.getDenseBlock();
final boolean flipOut = _type.isRowTypeB1ColumnAgg() && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns());
// input preparation
SideInput[] b = prepInputMatrices(inputs, 1, inputs.size() - 1, false, _tB1);
double[] scalars = prepInputScalars(scalarObjects);
// setup thread-local memory if necessary
if (allocTmp && _reqVectMem > 0)
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n, n2);
// core sequential execute
MatrixBlock a = inputs.get(0);
if (a instanceof CompressedMatrixBlock)
executeCompressed((CompressedMatrixBlock) a, b, scalars, c, n, 0, m);
else if (!a.isInSparseFormat())
executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m);
else
executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m);
// post-processing
if (allocTmp && _reqVectMem > 0)
LibSpoofPrimitives.cleanupThreadLocalMemory();
if (flipOut) {
fixTransposeDimensions(out);
out = LibMatrixReorg.transpose(out, new MatrixBlock(out.getNumColumns(), out.getNumRows(), false));
}
if (!aggIncr) {
out.recomputeNonZeros();
out.examSparsity();
}
return out;
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class ColGroupDDC1 method computeRowSums.
@Override
protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
// note: due to corrections the output might be a large dense block
DenseBlock c = result.getDenseBlock();
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
// pre-aggregate nnz per value tuple
double[] vals = sumAllValues(kplus, kbuff, false);
// for correctness in case of sqk+)
for (int i = rl; i < ru; i++) {
double[] cvals = c.values(i);
int cix = c.pos(i);
kbuff.set(cvals[cix], cvals[cix + 1]);
kplus2.execute2(kbuff, vals[_data[i] & 0xFF]);
cvals[cix] = kbuff._sum;
cvals[cix + 1] = kbuff._correction;
}
}
use of org.apache.sysml.runtime.matrix.data.DenseBlock in project incubator-systemml by apache.
the class ColGroupDDC2 method computeRowSums.
@Override
protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
// note: due to corrections the output might be a large dense block
DenseBlock c = result.getDenseBlock();
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
// pre-aggregate nnz per value tuple
double[] vals = sumAllValues(kplus, kbuff, false);
// for correctness in case of sqk+)
for (int i = rl; i < ru; i++) {
double[] cvals = c.values(i);
int cix = c.pos(i);
kbuff.set(cvals[cix], cvals[cix + 1]);
kplus2.execute2(kbuff, vals[_data[i]]);
cvals[cix] = kbuff._sum;
cvals[cix + 1] = kbuff._correction;
}
}
Aggregations