Search in sources :

Example 1 with FJTask

use of EDU.oswego.cs.dl.util.concurrent.FJTask in project tdq-studio-se by Talend.

the class Smp method run.

protected void run(final DoubleMatrix2D[] blocksA, final DoubleMatrix2D[] blocksB, final double[] results, final Matrix2DMatrix2DFunction function) {
    final FJTask[] subTasks = new FJTask[blocksA.length];
    for (int i = 0; i < blocksA.length; i++) {
        final int k = i;
        subTasks[i] = new FJTask() {

            public void run() {
                double result = function.apply(blocksA[k], blocksB != null ? blocksB[k] : null);
                if (results != null)
                    results[k] = result;
            // System.out.print(".");
            }
        };
    }
    // run tasks and wait for completion
    try {
        this.taskGroup.invoke(new FJTask() {

            public void run() {
                coInvoke(subTasks);
            }
        });
    } catch (InterruptedException exc) {
    }
}
Also used : FJTask(EDU.oswego.cs.dl.util.concurrent.FJTask)

Example 2 with FJTask

use of EDU.oswego.cs.dl.util.concurrent.FJTask in project tdq-studio-se by Talend.

the class SmpBlas method dgemv.

public void dgemv(final boolean transposeA, final double alpha, DoubleMatrix2D A, final DoubleMatrix1D x, final double beta, DoubleMatrix1D y) {
    /*
	split A, as follows:
	
			x x
			x
			x
	A
	xxx     x y
	xxx     x
	---     -
	xxx     x
	xxx     x
	---     -
	xxx     x

	*/
    if (transposeA) {
        dgemv(false, alpha, A.viewDice(), x, beta, y);
        return;
    }
    int m = A.rows();
    int n = A.columns();
    long flops = 2L * m * n;
    // each thread should process at least 30000 flops
    int noOfTasks = (int) Math.min(flops / 30000, this.maxThreads);
    int width = A.rows();
    noOfTasks = Math.min(width, noOfTasks);
    if (noOfTasks < 2) {
        // parallelization doesn't pay off (too much start up overhead)
        seqBlas.dgemv(transposeA, alpha, A, x, beta, y);
        return;
    }
    // set up concurrent tasks
    int span = width / noOfTasks;
    final FJTask[] subTasks = new FJTask[noOfTasks];
    for (int i = 0; i < noOfTasks; i++) {
        final int offset = i * span;
        // last span may be a bit larger
        if (i == noOfTasks - 1)
            span = width - span * i;
        // split A along rows into blocks
        final DoubleMatrix2D AA = A.viewPart(offset, 0, span, n);
        final DoubleMatrix1D yy = y.viewPart(offset, span);
        subTasks[i] = new FJTask() {

            public void run() {
                seqBlas.dgemv(transposeA, alpha, AA, x, beta, yy);
            // System.out.println("Hello "+offset);
            }
        };
    }
    // run tasks and wait for completion
    try {
        this.smp.taskGroup.invoke(new FJTask() {

            public void run() {
                coInvoke(subTasks);
            }
        });
    } catch (InterruptedException exc) {
    }
}
Also used : FJTask(EDU.oswego.cs.dl.util.concurrent.FJTask) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D) DoubleMatrix1D(cern.colt.matrix.DoubleMatrix1D)

Example 3 with FJTask

use of EDU.oswego.cs.dl.util.concurrent.FJTask in project tdq-studio-se by Talend.

the class SmpBlas method dgemm.

public void dgemm(final boolean transposeA, final boolean transposeB, final double alpha, final DoubleMatrix2D A, final DoubleMatrix2D B, final double beta, final DoubleMatrix2D C) {
    /*
	determine how to split and parallelize best into blocks
	if more B.columns than tasks --> split B.columns, as follows:
	
			xx|xx|xxx B
			xx|xx|xxx
			xx|xx|xxx
	A
	xxx     xx|xx|xxx C 
	xxx		xx|xx|xxx
	xxx		xx|xx|xxx
	xxx		xx|xx|xxx
	xxx		xx|xx|xxx

	if less B.columns than tasks --> split A.rows, as follows:
	
			xxxxxxx B
			xxxxxxx
			xxxxxxx
	A
	xxx     xxxxxxx C
	xxx     xxxxxxx
	---     -------
	xxx     xxxxxxx
	xxx     xxxxxxx
	---     -------
	xxx     xxxxxxx

	*/
    if (transposeA) {
        dgemm(false, transposeB, alpha, A.viewDice(), B, beta, C);
        return;
    }
    if (transposeB) {
        dgemm(transposeA, false, alpha, A, B.viewDice(), beta, C);
        return;
    }
    int m = A.rows();
    int n = A.columns();
    int p = B.columns();
    if (B.rows() != n)
        throw new IllegalArgumentException("Matrix2D inner dimensions must agree:" + A.toStringShort() + ", " + B.toStringShort());
    if (C.rows() != m || C.columns() != p)
        throw new IllegalArgumentException("Incompatibel result matrix: " + A.toStringShort() + ", " + B.toStringShort() + ", " + C.toStringShort());
    if (A == C || B == C)
        throw new IllegalArgumentException("Matrices must not be identical");
    long flops = 2L * m * n * p;
    // each thread should process at least 30000 flops
    int noOfTasks = (int) Math.min(flops / 30000, this.maxThreads);
    boolean splitB = (p >= noOfTasks);
    int width = splitB ? p : m;
    noOfTasks = Math.min(width, noOfTasks);
    if (noOfTasks < 2) {
        // parallelization doesn't pay off (too much start up overhead)
        seqBlas.dgemm(transposeA, transposeB, alpha, A, B, beta, C);
        return;
    }
    // set up concurrent tasks
    int span = width / noOfTasks;
    final FJTask[] subTasks = new FJTask[noOfTasks];
    for (int i = 0; i < noOfTasks; i++) {
        final int offset = i * span;
        // last span may be a bit larger
        if (i == noOfTasks - 1)
            span = width - span * i;
        final DoubleMatrix2D AA, BB, CC;
        if (splitB) {
            // split B along columns into blocks
            AA = A;
            BB = B.viewPart(0, offset, n, span);
            CC = C.viewPart(0, offset, m, span);
        } else {
            // split A along rows into blocks
            AA = A.viewPart(offset, 0, span, n);
            BB = B;
            CC = C.viewPart(offset, 0, span, p);
        }
        subTasks[i] = new FJTask() {

            public void run() {
                seqBlas.dgemm(transposeA, transposeB, alpha, AA, BB, beta, CC);
            // System.out.println("Hello "+offset);
            }
        };
    }
    // run tasks and wait for completion
    try {
        this.smp.taskGroup.invoke(new FJTask() {

            public void run() {
                coInvoke(subTasks);
            }
        });
    } catch (InterruptedException exc) {
    }
}
Also used : FJTask(EDU.oswego.cs.dl.util.concurrent.FJTask) DoubleMatrix2D(cern.colt.matrix.DoubleMatrix2D)

Aggregations

FJTask (EDU.oswego.cs.dl.util.concurrent.FJTask)3 DoubleMatrix2D (cern.colt.matrix.DoubleMatrix2D)2 DoubleMatrix1D (cern.colt.matrix.DoubleMatrix1D)1