use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class ExecutionContext method getDenseMatrixOutputForGPUInstruction.
/**
* Allocates a dense matrix on the GPU (for output)
* @param varName name of the output matrix (known by this {@link ExecutionContext})
* @param numRows number of rows of matrix object
* @param numCols number of columns of matrix object
* @return a pair containing the wrapping {@link MatrixObject} and a boolean indicating whether a cuda memory allocation took place (as opposed to the space already being allocated)
*/
public Pair<MatrixObject, Boolean> getDenseMatrixOutputForGPUInstruction(String varName, long numRows, long numCols) {
MatrixObject mo = allocateGPUMatrixObject(varName, numRows, numCols);
boolean allocated = mo.getGPUObject(getGPUContext(0)).acquireDeviceModifyDense();
mo.getMatrixCharacteristics().setNonZeros(-1);
return new Pair<>(mo, allocated);
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class ExternalFunctionInvocationInstruction method getInputObjects.
@SuppressWarnings("incomplete-switch")
private ArrayList<FunctionParameter> getInputObjects(CPOperand[] inputs, LocalVariableMap vars) {
ArrayList<FunctionParameter> ret = new ArrayList<>();
for (CPOperand input : inputs) {
switch(input.getDataType()) {
case MATRIX:
MatrixObject mobj = (MatrixObject) vars.get(input.getName());
ret.add(new Matrix(mobj, getMatrixValueType(input.getValueType())));
break;
case SCALAR:
ScalarObject so = (ScalarObject) vars.get(input.getName());
ret.add(new Scalar(getScalarValueType(input.getValueType()), so.getStringValue()));
break;
case OBJECT:
ret.add(new BinaryObject(vars.get(input.getName())));
break;
}
}
return ret;
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class ExternalFunctionInvocationInstruction method createOutputMatrixObject.
private MatrixObject createOutputMatrixObject(Matrix m) {
MatrixObject ret = m.getMatrixObject();
if (ret == null) {
// otherwise, pass in-memory matrix from extfunct back to invoking program
MatrixCharacteristics mc = new MatrixCharacteristics(m.getNumRows(), m.getNumCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
MetaDataFormat mfmd = new MetaDataFormat(mc, InputInfo.getMatchingOutputInfo(iinfo), iinfo);
ret = new MatrixObject(Expression.ValueType.DOUBLE, m.getFilePath(), mfmd);
}
// for allowing in-memory packagesupport matrices w/o file names
if (ret.getFileName().equals(Matrix.DEFAULT_FILENAME)) {
ret.setFileName(createDefaultOutputFilePathAndName());
}
return ret;
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class DynamicWriteMatrixCP method execute.
@Override
public void execute() {
boolean success = false;
try {
Matrix mat = (Matrix) this.getFunctionInput(0);
String fname = ((Scalar) this.getFunctionInput(1)).getValue();
String format = ((Scalar) this.getFunctionInput(2)).getValue();
MatrixObject mo = mat.getMatrixObject();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
OutputInfo oi = OutputInfo.stringToOutputInfo(format);
MatrixBlock mb = mo.acquireRead();
DataConverter.writeMatrixToHDFS(mb, fname, oi, mc);
mo.release();
success = true;
} catch (Exception e) {
throw new RuntimeException("Error executing dynamic write of matrix", e);
}
_success = new Scalar(ScalarValueType.Boolean, String.valueOf(success));
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class LibMatrixCUDA method matmultTSMM.
// ********************************************************************/
// ************* End of DEEP LEARNING Operators ***********************/
// ********************************************************************/
// ********************************************************************/
// ********** TRANSPOSE SELF MATRIX MULTIPLY Functions ****************/
// ********************************************************************/
/**
* Performs tsmm, A %*% A' or A' %*% A, on GPU by exploiting cublasDsyrk(...)
* <p>
* Memory Usage - If dense, input space - rows * cols, no intermediate memory, output - Max(rows*rows, cols*cols)
* If sparse, calls matmult
*
* @param ec execution context
* @param gCtx a valid {@link GPUContext}
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param left input matrix, as in a tsmm expression like A %*% A' or A' %*% A, we just need to check whether the left one is transposed or not, I named it 'left'
* @param outputName output matrix name
* @param isLeftTransposed if true, left transposed
*/
public static void matmultTSMM(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject left, String outputName, boolean isLeftTransposed) {
if (LOG.isTraceEnabled()) {
LOG.trace("GPU : matmultTSMM" + ", GPUContext=" + gCtx);
}
if (ec.getGPUContext(0) != gCtx)
throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
if (isInSparseFormat(gCtx, left)) {
// For sparse TSMM, invoke matmult (TODO: possible performance improvement)
LibMatrixCuMatMult.matmult(ec, gCtx, instName, left, left, outputName, isLeftTransposed, !isLeftTransposed);
return;
}
// Since CuBLAS expects inputs in column-major format,
// reverse the order of matrix-multiplication and take care of dimension mismatch.
int transa = isLeftTransposed ? cublasOperation.CUBLAS_OP_N : cublasOperation.CUBLAS_OP_T;
// Note: the dimensions are swapped
int m = toInt(isLeftTransposed ? left.getNumColumns() : left.getNumRows());
int k = toInt(isLeftTransposed ? left.getNumRows() : left.getNumColumns());
// For dense TSMM, exploit cublasDsyrk(...) and call custom kernel to flip the matrix
// Allocated the dense output matrix
MatrixObject output = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, m, m);
if (m == -1)
throw new DMLRuntimeException("Incorrect dimensions");
int lda = toInt(isLeftTransposed ? m : k);
int ldc = m;
if (!left.getGPUObject(gCtx).isAllocated())
throw new DMLRuntimeException("Input is not allocated:" + left.getGPUObject(gCtx).isAllocated());
if (!output.getGPUObject(gCtx).isAllocated())
throw new DMLRuntimeException("Output is not allocated:" + output.getGPUObject(gCtx).isAllocated());
Pointer A = getDensePointer(gCtx, left, instName);
Pointer C = getDensePointer(gCtx, output, instName);
long t0 = 0, t1 = 0;
if (DMLScript.FINEGRAINED_STATISTICS)
t0 = System.nanoTime();
cudaSupportFunctions.cublassyrk(getCublasHandle(gCtx), cublasFillMode.CUBLAS_FILL_MODE_LOWER, transa, m, k, one(), A, lda, zero(), C, ldc);
if (DMLScript.FINEGRAINED_STATISTICS)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, System.nanoTime() - t0);
if (DMLScript.FINEGRAINED_STATISTICS)
t1 = System.nanoTime();
copyUpperToLowerTriangle(gCtx, instName, output);
if (DMLScript.FINEGRAINED_STATISTICS)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - t1);
}
Aggregations