use of EDU.oswego.cs.dl.util.concurrent.FJTask in project tdq-studio-se by Talend.
the class Smp method run.
protected void run(final DoubleMatrix2D[] blocksA, final DoubleMatrix2D[] blocksB, final double[] results, final Matrix2DMatrix2DFunction function) {
final FJTask[] subTasks = new FJTask[blocksA.length];
for (int i = 0; i < blocksA.length; i++) {
final int k = i;
subTasks[i] = new FJTask() {
public void run() {
double result = function.apply(blocksA[k], blocksB != null ? blocksB[k] : null);
if (results != null)
results[k] = result;
// System.out.print(".");
}
};
}
// run tasks and wait for completion
try {
this.taskGroup.invoke(new FJTask() {
public void run() {
coInvoke(subTasks);
}
});
} catch (InterruptedException exc) {
}
}
use of EDU.oswego.cs.dl.util.concurrent.FJTask in project tdq-studio-se by Talend.
the class SmpBlas method dgemv.
public void dgemv(final boolean transposeA, final double alpha, DoubleMatrix2D A, final DoubleMatrix1D x, final double beta, DoubleMatrix1D y) {
/*
split A, as follows:
x x
x
x
A
xxx x y
xxx x
--- -
xxx x
xxx x
--- -
xxx x
*/
if (transposeA) {
dgemv(false, alpha, A.viewDice(), x, beta, y);
return;
}
int m = A.rows();
int n = A.columns();
long flops = 2L * m * n;
// each thread should process at least 30000 flops
int noOfTasks = (int) Math.min(flops / 30000, this.maxThreads);
int width = A.rows();
noOfTasks = Math.min(width, noOfTasks);
if (noOfTasks < 2) {
// parallelization doesn't pay off (too much start up overhead)
seqBlas.dgemv(transposeA, alpha, A, x, beta, y);
return;
}
// set up concurrent tasks
int span = width / noOfTasks;
final FJTask[] subTasks = new FJTask[noOfTasks];
for (int i = 0; i < noOfTasks; i++) {
final int offset = i * span;
// last span may be a bit larger
if (i == noOfTasks - 1)
span = width - span * i;
// split A along rows into blocks
final DoubleMatrix2D AA = A.viewPart(offset, 0, span, n);
final DoubleMatrix1D yy = y.viewPart(offset, span);
subTasks[i] = new FJTask() {
public void run() {
seqBlas.dgemv(transposeA, alpha, AA, x, beta, yy);
// System.out.println("Hello "+offset);
}
};
}
// run tasks and wait for completion
try {
this.smp.taskGroup.invoke(new FJTask() {
public void run() {
coInvoke(subTasks);
}
});
} catch (InterruptedException exc) {
}
}
use of EDU.oswego.cs.dl.util.concurrent.FJTask in project tdq-studio-se by Talend.
the class SmpBlas method dgemm.
public void dgemm(final boolean transposeA, final boolean transposeB, final double alpha, final DoubleMatrix2D A, final DoubleMatrix2D B, final double beta, final DoubleMatrix2D C) {
/*
determine how to split and parallelize best into blocks
if more B.columns than tasks --> split B.columns, as follows:
xx|xx|xxx B
xx|xx|xxx
xx|xx|xxx
A
xxx xx|xx|xxx C
xxx xx|xx|xxx
xxx xx|xx|xxx
xxx xx|xx|xxx
xxx xx|xx|xxx
if less B.columns than tasks --> split A.rows, as follows:
xxxxxxx B
xxxxxxx
xxxxxxx
A
xxx xxxxxxx C
xxx xxxxxxx
--- -------
xxx xxxxxxx
xxx xxxxxxx
--- -------
xxx xxxxxxx
*/
if (transposeA) {
dgemm(false, transposeB, alpha, A.viewDice(), B, beta, C);
return;
}
if (transposeB) {
dgemm(transposeA, false, alpha, A, B.viewDice(), beta, C);
return;
}
int m = A.rows();
int n = A.columns();
int p = B.columns();
if (B.rows() != n)
throw new IllegalArgumentException("Matrix2D inner dimensions must agree:" + A.toStringShort() + ", " + B.toStringShort());
if (C.rows() != m || C.columns() != p)
throw new IllegalArgumentException("Incompatibel result matrix: " + A.toStringShort() + ", " + B.toStringShort() + ", " + C.toStringShort());
if (A == C || B == C)
throw new IllegalArgumentException("Matrices must not be identical");
long flops = 2L * m * n * p;
// each thread should process at least 30000 flops
int noOfTasks = (int) Math.min(flops / 30000, this.maxThreads);
boolean splitB = (p >= noOfTasks);
int width = splitB ? p : m;
noOfTasks = Math.min(width, noOfTasks);
if (noOfTasks < 2) {
// parallelization doesn't pay off (too much start up overhead)
seqBlas.dgemm(transposeA, transposeB, alpha, A, B, beta, C);
return;
}
// set up concurrent tasks
int span = width / noOfTasks;
final FJTask[] subTasks = new FJTask[noOfTasks];
for (int i = 0; i < noOfTasks; i++) {
final int offset = i * span;
// last span may be a bit larger
if (i == noOfTasks - 1)
span = width - span * i;
final DoubleMatrix2D AA, BB, CC;
if (splitB) {
// split B along columns into blocks
AA = A;
BB = B.viewPart(0, offset, n, span);
CC = C.viewPart(0, offset, m, span);
} else {
// split A along rows into blocks
AA = A.viewPart(offset, 0, span, n);
BB = B;
CC = C.viewPart(offset, 0, span, p);
}
subTasks[i] = new FJTask() {
public void run() {
seqBlas.dgemm(transposeA, transposeB, alpha, AA, BB, beta, CC);
// System.out.println("Hello "+offset);
}
};
}
// run tasks and wait for completion
try {
this.smp.taskGroup.invoke(new FJTask() {
public void run() {
coInvoke(subTasks);
}
});
} catch (InterruptedException exc) {
}
}
Aggregations