use of org.apache.sysml.runtime.matrix.operators.RightScalarOperator in project incubator-systemml by apache.
the class BasicScalarOperationsSparseUnsafeTest method runScalarOperationsTest.
private static void runScalarOperationsTest(SparsityType sptype, ValueType vtype, boolean compress) {
try {
// prepare sparsity for input data
double sparsity = -1;
switch(sptype) {
case DENSE:
sparsity = sparsity1;
break;
case SPARSE:
sparsity = sparsity2;
break;
case EMPTY:
sparsity = sparsity3;
break;
}
// generate input data
double min = (vtype == ValueType.CONST) ? 10 : -10;
double[][] input = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7);
if (vtype == ValueType.RAND_ROUND_OLE || vtype == ValueType.RAND_ROUND_DDC) {
CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype == ValueType.RAND_ROUND_DDC);
input = TestUtils.round(input);
}
MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
// compress given matrix block
CompressedMatrixBlock cmb = new CompressedMatrixBlock(mb);
if (compress)
cmb.compress();
// matrix-scalar uncompressed
ScalarOperator sop = new RightScalarOperator(Plus.getPlusFnObject(), 7);
MatrixBlock ret1 = (MatrixBlock) mb.scalarOperations(sop, new MatrixBlock());
// matrix-scalar compressed
MatrixBlock ret2 = (MatrixBlock) cmb.scalarOperations(sop, new MatrixBlock());
if (compress)
ret2 = ((CompressedMatrixBlock) ret2).decompress();
// compare result with input
double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
TestUtils.compareMatrices(d1, d2, rows, cols, 0.0000001);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
}
}
use of org.apache.sysml.runtime.matrix.operators.RightScalarOperator in project systemml by apache.
the class LibMatrixCUDA method squareMatrix.
/**
* Helper method to square a matrix in GPU memory
* @param gCtx a valid {@link GPUContext}
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param in input matrix on GPU
* @param out output matrix on GPU
* @param rlen row length
* @param clen column length
*/
private static void squareMatrix(GPUContext gCtx, String instName, Pointer in, Pointer out, int rlen, int clen) {
ScalarOperator power2op = new RightScalarOperator(Power.getPowerFnObject(), 2);
matrixScalarOp(gCtx, instName, in, 2, rlen, clen, out, power2op);
}
use of org.apache.sysml.runtime.matrix.operators.RightScalarOperator in project systemml by apache.
the class LibMatrixCUDA method unaryAggregate.
// ********************************************************************/
// ******** End of TRANSPOSE SELF MATRIX MULTIPLY Functions ***********/
// ********************************************************************/
// ********************************************************************/
// **************** UNARY AGGREGATE Functions ************************/
// ********************************************************************/
/**
* Entry point to perform Unary aggregate operations on the GPU.
* The execution context object is used to allocate memory for the GPU.
*
* @param ec Instance of {@link ExecutionContext}, from which the output variable will be allocated
* @param gCtx a valid {@link GPUContext}
* @param instName name of the invoking instruction to record{@link Statistics}.
* @param in1 input matrix
* @param output output matrix/scalar name
* @param op Instance of {@link AggregateUnaryOperator} which encapsulates the direction of reduction/aggregation and the reduction operation.
*/
public static void unaryAggregate(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, String output, AggregateUnaryOperator op) {
if (ec.getGPUContext(0) != gCtx)
throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
if (LOG.isTraceEnabled()) {
LOG.trace("GPU : unaryAggregate" + ", GPUContext=" + gCtx);
}
final int REDUCTION_ALL = 1;
final int REDUCTION_ROW = 2;
final int REDUCTION_COL = 3;
final int REDUCTION_DIAG = 4;
// A kahan sum implemention is not provided. is a "uak+" or other kahan operator is encountered,
// it just does regular summation reduction.
final int OP_PLUS = 1;
final int OP_PLUS_SQ = 2;
final int OP_MEAN = 3;
final int OP_VARIANCE = 4;
final int OP_MULTIPLY = 5;
final int OP_MAX = 6;
final int OP_MIN = 7;
final int OP_MAXINDEX = 8;
final int OP_MININDEX = 9;
// Sanity Checks
if (!in1.getGPUObject(gCtx).isAllocated())
throw new DMLRuntimeException("Internal Error - The input is not allocated for a GPU Aggregate Unary:" + in1.getGPUObject(gCtx).isAllocated());
boolean isSparse = in1.getGPUObject(gCtx).isSparse();
IndexFunction indexFn = op.indexFn;
AggregateOperator aggOp = op.aggOp;
// Convert Reduction direction to a number
int reductionDirection = -1;
if (indexFn instanceof ReduceAll) {
reductionDirection = REDUCTION_ALL;
} else if (indexFn instanceof ReduceRow) {
reductionDirection = REDUCTION_ROW;
} else if (indexFn instanceof ReduceCol) {
reductionDirection = REDUCTION_COL;
} else if (indexFn instanceof ReduceDiag) {
reductionDirection = REDUCTION_DIAG;
} else {
throw new DMLRuntimeException("Internal Error - Invalid index function type, only reducing along rows, columns, diagonals or all elements is supported in Aggregate Unary operations");
}
if (reductionDirection == -1)
throw new DMLRuntimeException("Internal Error - Incorrect type of reduction direction set for aggregate unary GPU instruction");
// Convert function type to a number
int opIndex = -1;
if (aggOp.increOp.fn instanceof KahanPlus) {
opIndex = OP_PLUS;
} else if (aggOp.increOp.fn instanceof KahanPlusSq) {
opIndex = OP_PLUS_SQ;
} else if (aggOp.increOp.fn instanceof Mean) {
opIndex = OP_MEAN;
} else if (aggOp.increOp.fn instanceof CM) {
if (((CM) aggOp.increOp.fn).getAggOpType() != CMOperator.AggregateOperationTypes.VARIANCE)
throw new DMLRuntimeException("Internal Error - Invalid Type of CM operator for Aggregate Unary operation on GPU");
opIndex = OP_VARIANCE;
} else if (aggOp.increOp.fn instanceof Plus) {
opIndex = OP_PLUS;
} else if (aggOp.increOp.fn instanceof Multiply) {
opIndex = OP_MULTIPLY;
} else if (aggOp.increOp.fn instanceof Builtin) {
Builtin b = (Builtin) aggOp.increOp.fn;
switch(b.bFunc) {
case MAX:
opIndex = OP_MAX;
break;
case MIN:
opIndex = OP_MIN;
break;
case MAXINDEX:
opIndex = OP_MAXINDEX;
break;
case MININDEX:
opIndex = OP_MININDEX;
break;
default:
new DMLRuntimeException("Internal Error - Unsupported Builtin Function for Aggregate unary being done on GPU");
}
} else {
throw new DMLRuntimeException("Internal Error - Aggregate operator has invalid Value function");
}
if (opIndex == -1)
throw new DMLRuntimeException("Internal Error - Incorrect type of operation set for aggregate unary GPU instruction");
int rlen = (int) in1.getNumRows();
int clen = (int) in1.getNumColumns();
if (isSparse) {
// The strategy for the time being is to convert sparse to dense
// until a sparse specific kernel is written.
in1.getGPUObject(gCtx).sparseToDense(instName);
// long nnz = in1.getNnz();
// assert nnz > 0 : "Internal Error - number of non zeroes set to " + nnz + " in Aggregate Binary for GPU";
// MatrixObject out = ec.getSparseMatrixOutputForGPUInstruction(output, nnz);
// throw new DMLRuntimeException("Internal Error - Not implemented");
}
long outRLen = -1;
long outCLen = -1;
if (indexFn instanceof ReduceRow) {
// COL{SUM, MAX...}
outRLen = 1;
outCLen = clen;
} else if (indexFn instanceof ReduceCol) {
// ROW{SUM, MAX,...}
outRLen = rlen;
outCLen = 1;
}
Pointer out = null;
if (reductionDirection == REDUCTION_COL || reductionDirection == REDUCTION_ROW) {
// Matrix output
MatrixObject out1 = getDenseMatrixOutputForGPUInstruction(ec, instName, output, outRLen, outCLen);
out = getDensePointer(gCtx, out1, instName);
}
Pointer in = getDensePointer(gCtx, in1, instName);
int size = rlen * clen;
// For scalars, set the scalar output in the Execution Context object
switch(opIndex) {
case OP_PLUS:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
// The names are a bit misleading, REDUCTION_COL refers to the direction (reduce all elements in a column)
reduceRow(gCtx, instName, "reduce_row_sum", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_sum", in, out, rlen, clen);
break;
}
case REDUCTION_DIAG:
throw new DMLRuntimeException("Internal Error - Row, Column and Diag summation not implemented yet");
}
break;
}
case OP_PLUS_SQ:
{
// Calculate the squares in a temporary object tmp
Pointer tmp = gCtx.allocate(instName, size * sizeOfDataType);
squareMatrix(gCtx, instName, in, tmp, rlen, clen);
// Then do the sum on the temporary object and free it
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", tmp, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
// The names are a bit misleading, REDUCTION_COL refers to the direction (reduce all elements in a column)
reduceRow(gCtx, instName, "reduce_row_sum", tmp, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_sum", tmp, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for summation squared");
}
gCtx.cudaFreeHelper(instName, tmp);
break;
}
case OP_MEAN:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
double mean = result / size;
ec.setScalarOutput(output, new DoubleObject(mean));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_mean", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_mean", in, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for mean");
}
break;
}
case OP_MULTIPLY:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_prod", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for multiplication");
}
break;
}
case OP_MAX:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_max", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_max", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_max", in, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for max");
}
break;
}
case OP_MIN:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_min", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_min", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_min", in, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for min");
}
break;
}
case OP_VARIANCE:
{
// Temporary GPU array for
Pointer tmp = gCtx.allocate(instName, size * sizeOfDataType);
Pointer tmp2 = gCtx.allocate(instName, size * sizeOfDataType);
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
double mean = result / size;
// Subtract mean from every element in the matrix
ScalarOperator minusOp = new RightScalarOperator(Minus.getMinusFnObject(), mean);
matrixScalarOp(gCtx, instName, in, mean, rlen, clen, tmp, minusOp);
squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
double result2 = reduceAll(gCtx, instName, "reduce_sum", tmp2, size);
double variance = result2 / (size - 1);
ec.setScalarOutput(output, new DoubleObject(variance));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_mean", in, out, rlen, clen);
// Subtract the row-wise mean from every element in the matrix
BinaryOperator minusOp = new BinaryOperator(Minus.getMinusFnObject());
matrixMatrixOp(gCtx, instName, in, out, rlen, clen, VectorShape.NONE.code(), VectorShape.COLUMN.code(), tmp, minusOp);
squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
Pointer tmpRow = gCtx.allocate(instName, rlen * sizeOfDataType);
reduceRow(gCtx, instName, "reduce_row_sum", tmp2, tmpRow, rlen, clen);
ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), clen - 1);
matrixScalarOp(gCtx, instName, tmpRow, clen - 1, rlen, 1, out, divideOp);
gCtx.cudaFreeHelper(instName, tmpRow);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_mean", in, out, rlen, clen);
// Subtract the columns-wise mean from every element in the matrix
BinaryOperator minusOp = new BinaryOperator(Minus.getMinusFnObject());
matrixMatrixOp(gCtx, instName, in, out, rlen, clen, VectorShape.NONE.code(), VectorShape.ROW.code(), tmp, minusOp);
squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
Pointer tmpCol = gCtx.allocate(instName, clen * sizeOfDataType);
reduceCol(gCtx, instName, "reduce_col_sum", tmp2, tmpCol, rlen, clen);
ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), rlen - 1);
matrixScalarOp(gCtx, instName, tmpCol, rlen - 1, 1, clen, out, divideOp);
gCtx.cudaFreeHelper(instName, tmpCol);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for variance");
}
gCtx.cudaFreeHelper(instName, tmp);
gCtx.cudaFreeHelper(instName, tmp2);
break;
}
case OP_MAXINDEX:
{
switch(reductionDirection) {
case REDUCTION_COL:
throw new DMLRuntimeException("Internal Error - Column maxindex of matrix not implemented yet for GPU ");
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for maxindex");
}
// break;
}
case OP_MININDEX:
{
switch(reductionDirection) {
case REDUCTION_COL:
throw new DMLRuntimeException("Internal Error - Column minindex of matrix not implemented yet for GPU ");
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for minindex");
}
// break;
}
default:
throw new DMLRuntimeException("Internal Error - Invalid GPU Unary aggregate function!");
}
}
use of org.apache.sysml.runtime.matrix.operators.RightScalarOperator in project systemml by apache.
the class LibMatrixCUDA method matrixScalarArithmetic.
/**
* Entry point to perform elementwise matrix-scalar arithmetic operation specified by op
*
* @param ec execution context
* @param gCtx a valid {@link GPUContext}
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param in input matrix
* @param outputName output matrix name
* @param isInputTransposed true if input transposed
* @param op scalar operator
*/
public static void matrixScalarArithmetic(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in, String outputName, boolean isInputTransposed, ScalarOperator op) {
if (ec.getGPUContext(0) != gCtx)
throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
double constant = op.getConstant();
if (LOG.isTraceEnabled()) {
LOG.trace("GPU : matrixScalarArithmetic, scalar: " + constant + ", GPUContext=" + gCtx);
}
int outRLen = isInputTransposed ? (int) in.getNumColumns() : (int) in.getNumRows();
int outCLen = isInputTransposed ? (int) in.getNumRows() : (int) in.getNumColumns();
// if(!isCUDALibAvailable) {
if (constant == 0) {
if (op.fn instanceof Plus || (op.fn instanceof Minus && op instanceof RightScalarOperator) || op.fn instanceof Or) {
deviceCopy(ec, gCtx, instName, in, outputName, isInputTransposed);
} else if (op.fn instanceof Multiply || op.fn instanceof And) {
setOutputToConstant(ec, gCtx, instName, 0.0, outputName, outRLen, outCLen);
} else if (op.fn instanceof Power) {
setOutputToConstant(ec, gCtx, instName, 1.0, outputName, outRLen, outCLen);
} else // TODO:
// x/0.0 is either +Infinity or -Infinity according to Java.
// In the context of a matrix, different elements of the matrix
// could have different values.
// If the IEEE 754 standard defines otherwise, this logic needs
// to be re-enabled and the Java computation logic for divide by zero
// needs to be revisited
// else if(op.fn instanceof Divide && isSparseAndEmpty(gCtx, in)) {
// setOutputToConstant(ec, gCtx, instName, Double.NaN, outputName);
// }
// else if(op.fn instanceof Divide) {
// //For division, IEEE 754 defines x/0.0 as INFINITY and 0.0/0.0 as NaN.
// compareAndSet(ec, gCtx, instName, in, outputName, 0.0, 1e-6, Double.NaN, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
// }
{
// TODO: Potential to optimize
matrixScalarOp(ec, gCtx, instName, in, outputName, isInputTransposed, op);
}
} else if (constant == 1.0 && op.fn instanceof Or) {
setOutputToConstant(ec, gCtx, instName, 1.0, outputName, outRLen, outCLen);
} else if (constant == 1.0 && (op.fn instanceof And || op.fn instanceof Power)) {
deviceCopy(ec, gCtx, instName, in, outputName, isInputTransposed);
} else {
matrixScalarOp(ec, gCtx, instName, in, outputName, isInputTransposed, op);
}
// }
// else {
// double alpha = 0;
// if(op.fn instanceof Multiply) {
// alpha = op.getConstant();
// }
// else if(op.fn instanceof Divide && op instanceof RightScalarOperator) {
// alpha = Math.pow(op.getConstant(), -1.0);
// }
// else {
// throw new DMLRuntimeException("Unsupported op");
// }
// TODO: Performance optimization: Call cublasDaxpy if(in.getNumRows() == 1 || in.getNumColumns() == 1)
// C = alpha* op( A ) + beta* op ( B )
// dgeam(ec, gCtx, instName, in, in, outputName, isInputTransposed, isInputTransposed, alpha, 0.0);
// }
}
use of org.apache.sysml.runtime.matrix.operators.RightScalarOperator in project systemml by apache.
the class LibMatrixCUDA method matrixMatrixOp.
/**
* Utility to launch binary cellwise matrix-matrix operations CUDA kernel
*
* @param gCtx a valid {@link GPUContext}
* @param ec execution context
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param in1 left input matrix
* @param in2 right input matrix
* @param outputName output variable name
* @param isLeftTransposed true if left matrix is transposed
* @param isRightTransposed true if right matrix is transposed
* @param op operator
*/
private static void matrixMatrixOp(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2, String outputName, boolean isLeftTransposed, boolean isRightTransposed, BinaryOperator op) {
if (ec.getGPUContext(0) != gCtx)
throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
boolean isEmpty1 = isSparseAndEmpty(gCtx, in1);
boolean isEmpty2 = isSparseAndEmpty(gCtx, in2);
int rlenA = toInt(in1.getNumRows());
int rlenB = toInt(in2.getNumRows());
int clenA = toInt(in1.getNumColumns());
int clenB = toInt(in2.getNumColumns());
int vecStatusA = getVectorStatus(rlenA, clenA).code();
int vecStatusB = getVectorStatus(rlenB, clenB).code();
if (isLeftTransposed || isRightTransposed) {
throw new DMLRuntimeException("Unsupported operator: GPU transposed binary op " + isLeftTransposed + " " + isRightTransposed);
}
long outRLen = Math.max(rlenA, rlenB);
long outCLen = Math.max(clenA, clenB);
if (isEmpty1 && isEmpty2) {
MatrixObject out = ec.allocateGPUMatrixObject(outputName, outRLen, outCLen);
// When both inputs are empty, the output is empty too (except in the case of division)
if (op.fn instanceof Divide || op.fn instanceof IntegerDivide || op.fn instanceof Modulus) {
out.getGPUObject(gCtx).allocateAndFillDense(Double.NaN);
} else if (op.fn instanceof Minus1Multiply) {
out.getGPUObject(gCtx).allocateAndFillDense(1.0);
} else {
out.getGPUObject(gCtx).allocateSparseAndEmpty();
}
} else // Check for M1 * M2 when M1 is empty; if M2 is a vector then fallback to general case
if (isEmpty1 && clenB != 1 && rlenB != 1) {
// C = empty_in1 op in2 ==> becomes ==> C = 0.0 op in2
matrixScalarArithmetic(ec, gCtx, instName, in2, outputName, isRightTransposed, new LeftScalarOperator(op.fn, 0.0));
} else // Check for M1 * M2 when M2 is empty; if M1 is a vector then fallback to general case
if (isEmpty2 && clenA != 1 && rlenA != 1) {
// C = in1 op empty_in2 ==> becomes ==> C = in1 op 0.0
matrixScalarArithmetic(ec, gCtx, instName, in1, outputName, isLeftTransposed, new RightScalarOperator(op.fn, 0.0));
} else {
// TODO: FIXME: Implement sparse binCellSparseOp kernel
Pointer A = getDensePointer(gCtx, in1, instName);
// TODO: FIXME: Implement sparse binCellSparseOp kernel
Pointer B = getDensePointer(gCtx, in2, instName);
// Allocated the dense output matrix
MatrixObject out = null;
try {
out = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, outRLen, outCLen);
} catch (DMLRuntimeException e) {
throw new DMLRuntimeException("Incorrect dimensions: dimA:[" + rlenA + "," + clenA + "]" + " dimB:[" + rlenB + "," + clenB + "] out:[" + outRLen + "," + outCLen + "]", e);
}
Pointer C = getDensePointer(gCtx, out, instName);
int maxRlen = Math.max(rlenA, rlenB);
int maxClen = Math.max(clenA, clenB);
matrixMatrixOp(gCtx, instName, A, B, maxRlen, maxClen, vecStatusA, vecStatusB, C, op);
}
}
Aggregations