use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class JcublasLapack method dpotrf.
@Override
public void dpotrf(byte uplo, int N, INDArray A, INDArray INFO) {
INDArray a = A;
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
log.warn("FLOAT potrf called in DOUBLE environment");
if (A.ordering() == 'c')
a = A.dup('f');
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
// Get context for current thread
CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
// setup the solver handles for cuSolver calls
cusolverDnHandle_t handle = ctx.getSolverHandle();
cusolverDnContext solverDn = new cusolverDnContext(handle);
// synchronized on the solver
synchronized (handle) {
int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
if (result != 0)
throw new BlasException("solverSetStream failed");
// transfer the INDArray into GPU memory
CublasPointer xAPointer = new CublasPointer(a, ctx);
// this output - indicates how much memory we'll need for the real operation
DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
int stat = cusolverDnDpotrf_bufferSize(solverDn, uplo, N, (DoublePointer) xAPointer.getDevicePointer(), N, // we intentionally use host pointer here
(IntPointer) worksizeBuffer.addressPointer());
if (stat != CUSOLVER_STATUS_SUCCESS) {
throw new BlasException("cusolverDnDpotrf_bufferSize failed", stat);
}
int worksize = worksizeBuffer.getInt(0);
// Now allocate memory for the workspace, the permutation matrix and a return code
Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
// Do the actual decomp
stat = cusolverDnDpotrf(solverDn, uplo, N, (DoublePointer) xAPointer.getDevicePointer(), N, new CudaPointer(workspace).asDoublePointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
if (stat != CUSOLVER_STATUS_SUCCESS) {
throw new BlasException("cusolverDnDpotrf failed", stat);
}
}
allocator.registerAction(ctx, a);
allocator.registerAction(ctx, INFO);
if (a != A)
A.assign(a);
if (uplo == 'U') {
A.assign(A.transpose());
INDArrayIndex[] ix = new INDArrayIndex[2];
for (int i = 1; i < Math.min(A.rows(), A.columns()); i++) {
ix[0] = NDArrayIndex.point(i);
ix[1] = NDArrayIndex.interval(0, i);
A.put(ix, 0);
}
} else {
INDArrayIndex[] ix = new INDArrayIndex[2];
for (int i = 0; i < Math.min(A.rows(), A.columns() - 1); i++) {
ix[0] = NDArrayIndex.point(i);
ix[1] = NDArrayIndex.interval(i + 1, A.columns());
A.put(ix, 0);
}
}
log.info("A: {}", A);
}
use of org.bytedeco.javacpp.Pointer in project bigbluebutton by bigbluebutton.
the class FFmpegFrameRecorder method recordImage.
public boolean recordImage(int width, int height, int depth, int channels, int stride, int pixelFormat, long frameTimestamp, Buffer... image) throws Exception {
if (video_st == null) {
throw new Exception("No video output stream (Is imageWidth > 0 && imageHeight > 0 and has start() been called?)");
}
int ret;
if (image == null || image.length == 0) {
/* no more frame to compress. The codec has a latency of a few
frames if using B frames, so we get the last frames by
passing the same picture again */
} else {
int step = stride * Math.abs(depth) / 8;
BytePointer data = image[0] instanceof ByteBuffer ? new BytePointer((ByteBuffer) image[0].position(0)) : new BytePointer(new Pointer(image[0].position(0)));
if (pixelFormat == AV_PIX_FMT_NONE) {
if ((depth == Frame.DEPTH_UBYTE || depth == Frame.DEPTH_BYTE) && channels == 3) {
pixelFormat = AV_PIX_FMT_BGR24;
} else if ((depth == Frame.DEPTH_UBYTE || depth == Frame.DEPTH_BYTE) && channels == 1) {
pixelFormat = AV_PIX_FMT_GRAY8;
} else if ((depth == Frame.DEPTH_USHORT || depth == Frame.DEPTH_SHORT) && channels == 1) {
pixelFormat = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN) ? AV_PIX_FMT_GRAY16BE : AV_PIX_FMT_GRAY16LE;
} else if ((depth == Frame.DEPTH_UBYTE || depth == Frame.DEPTH_BYTE) && channels == 4) {
pixelFormat = AV_PIX_FMT_RGBA;
} else if ((depth == Frame.DEPTH_UBYTE || depth == Frame.DEPTH_BYTE) && channels == 2) {
// Android's camera capture format
pixelFormat = AV_PIX_FMT_NV21;
step = width;
} else {
throw new Exception("Could not guess pixel format of image: depth=" + depth + ", channels=" + channels);
}
}
if (video_c.pix_fmt() != pixelFormat || video_c.width() != width || video_c.height() != height) {
/* convert to the codec pixel format if needed */
img_convert_ctx = sws_getCachedContext(img_convert_ctx, width, height, pixelFormat, video_c.width(), video_c.height(), video_c.pix_fmt(), SWS_BILINEAR, null, null, (DoublePointer) null);
if (img_convert_ctx == null) {
throw new Exception("sws_getCachedContext() error: Cannot initialize the conversion context.");
}
avpicture_fill(new AVPicture(tmp_picture), data, pixelFormat, width, height);
avpicture_fill(new AVPicture(picture), picture_buf, video_c.pix_fmt(), video_c.width(), video_c.height());
tmp_picture.linesize(0, step);
tmp_picture.format(pixelFormat);
tmp_picture.width(width);
tmp_picture.height(height);
picture.format(video_c.pix_fmt());
picture.width(video_c.width());
picture.height(video_c.height());
sws_scale(img_convert_ctx, new PointerPointer(tmp_picture), tmp_picture.linesize(), 0, height, new PointerPointer(picture), picture.linesize());
} else {
avpicture_fill(new AVPicture(picture), data, pixelFormat, width, height);
picture.linesize(0, step);
picture.format(pixelFormat);
picture.width(width);
picture.height(height);
}
}
if ((oformat.flags() & AVFMT_RAWPICTURE) != 0) {
if (image == null || image.length == 0) {
return false;
}
/* raw video case. The API may change slightly in the future for that? */
av_init_packet(video_pkt);
video_pkt.flags(video_pkt.flags() | AV_PKT_FLAG_KEY);
video_pkt.stream_index(video_st.index());
video_pkt.data(new BytePointer(picture));
video_pkt.size(Loader.sizeof(AVPicture.class));
} else {
/* encode the image */
av_init_packet(video_pkt);
video_pkt.data(video_outbuf);
video_pkt.size(video_outbuf_size);
picture.quality(video_c.global_quality());
if ((ret = avcodec_encode_video2(video_c, video_pkt, image == null || image.length == 0 ? null : picture, got_video_packet)) < 0) {
throw new Exception("avcodec_encode_video2() error " + ret + ": Could not encode video packet.");
}
// magic required by libx264
picture.pts(picture.pts() + 1);
/* if zero size, it means the image was buffered */
if (got_video_packet[0] != 0) {
if (video_pkt.pts() != AV_NOPTS_VALUE) {
// Override timestamp from system screen grabber. Otherwise, we will have skewed recorded file.
// FfmpegFrameRecorder needs to propagate this timestamp into the avpacket sent to the server.
// ralam - Sept. 14, 2016
video_pkt.pts(frameTimestamp);
//video_pkt.pts(av_rescale_q(video_pkt.pts(), video_c.time_base(), video_st.time_base()));
}
if (video_pkt.dts() != AV_NOPTS_VALUE) {
video_pkt.dts(frameTimestamp);
//video_pkt.dts(av_rescale_q(video_pkt.dts(), video_c.time_base(), video_st.time_base()));
}
video_pkt.stream_index(video_st.index());
} else {
return false;
}
}
writePacket(AVMEDIA_TYPE_VIDEO, video_pkt);
return image != null ? (video_pkt.flags() & AV_PKT_FLAG_KEY) != 0 : got_video_packet[0] != 0;
}
use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.
the class CudnnSubsamplingHelper method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, int[] kernel, int[] strides, int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode) {
int miniBatch = input.size(0);
int depth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
} else {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
}
int outH = outSize[0];
int outW = outSize[1];
//subsampling doesn't have weights and thus gradients are not calculated for this layer
//only scale and reshape epsilon
Gradient retGradient = new DefaultGradient();
//Epsilons in shape: [miniBatch, depth, outH, outW]
//Epsilons out shape: [miniBatch, depth, inH, inW]
int poolingMode;
switch(poolingType) {
case AVG:
poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
case MAX:
poolingMode = CUDNN_POOLING_MAX;
break;
case NONE:
return new Pair<>(retGradient, epsilon);
default:
return null;
}
if (!Shape.strideDescendingCAscendingF(epsilon)) {
// apparently not supported by cuDNN
epsilon = epsilon.dup();
}
int[] srcStride = input.stride();
int[] deltaStride = epsilon.stride();
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, outH, outW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0], kernel[1], pad[0], pad[1], strides[0], strides[1]));
INDArray outEpsilon = Nd4j.create(new int[] { miniBatch, depth, inH, inW }, 'c');
int[] dstStride = outEpsilon.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(input, epsilon, reduced, outEpsilon);
Pointer srcData = allocator.getPointer(input, context);
Pointer epsData = allocator.getPointer(epsilon, context);
Pointer zData = allocator.getPointer(reduced, context);
Pointer dstData = allocator.getPointer(outEpsilon, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnPoolingBackward(cudnnContext, cudnnContext.poolingDesc, alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, beta, cudnnContext.dstTensorDesc, dstData));
allocator.registerAction(context, input, epsilon, reduced, outEpsilon);
return new Pair<>(retGradient, outEpsilon);
}
use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.
the class CudnnSubsamplingHelper method activate.
@Override
public INDArray activate(INDArray input, boolean training, int[] kernel, int[] strides, int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode) {
int miniBatch = input.size(0);
int inDepth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
} else {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
}
int outH = outSize[0];
int outW = outSize[1];
int poolingMode;
switch(poolingType) {
case AVG:
poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
case MAX:
poolingMode = CUDNN_POOLING_MAX;
break;
case NONE:
return input;
default:
return null;
}
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
int[] srcStride = input.stride();
checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0], kernel[1], pad[0], pad[1], strides[0], strides[1]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
reduced = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, outH, outW }, 'c');
int[] dstStride = reduced.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, outH, outW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(input, reduced);
Pointer srcData = allocator.getPointer(input, context);
Pointer dstData = allocator.getPointer(reduced, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnPoolingForward(cudnnContext, cudnnContext.poolingDesc, alpha, cudnnContext.srcTensorDesc, srcData, beta, cudnnContext.dstTensorDesc, dstData));
allocator.registerAction(context, input, reduced);
return reduced;
}
use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.
the class CudnnBatchNormalizationHelper method preOutput.
@Override
public INDArray preOutput(INDArray x, boolean training, int[] shape, INDArray gamma, INDArray beta, INDArray mean, INDArray var, double decay, double eps) {
if (eps < CUDNN_BN_MIN_EPSILON) {
throw new IllegalArgumentException("Error: eps < CUDNN_BN_MIN_EPSILON (" + eps + " < " + CUDNN_BN_MIN_EPSILON + ")");
}
int miniBatch = x.size(0);
int inDepth = x.size(1);
int inH = x.size(2);
int inW = x.size(3);
int[] srcStride = x.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
INDArray activations = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, inH, inW }, 'c');
int[] dstStride = activations.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
int[] gammaStride = gamma.stride();
checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, tensorFormat, dataType, shape[0], shape[1], shape.length > 2 ? shape[2] : 1, shape.length > 3 ? shape[3] : 1));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(x, activations, gamma, beta, mean, var);
Pointer srcData = allocator.getPointer(x, context);
Pointer dstData = allocator.getPointer(activations, context);
Pointer gammaData = allocator.getPointer(gamma, context);
Pointer betaData = allocator.getPointer(beta, context);
Pointer meanData = allocator.getPointer(mean, context);
Pointer varData = allocator.getPointer(var, context);
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
if (training) {
if (meanCache.capacity() < mean.data().length() * mean.data().getElementSize()) {
meanCache.deallocate();
meanCache = new Cache(mean.data().length() * mean.data().getElementSize());
}
if (varCache.capacity() < var.data().length() * mean.data().getElementSize()) {
varCache.deallocate();
varCache = new Cache(var.data().length() * mean.data().getElementSize());
}
checkCudnn(cudnnBatchNormalizationForwardTraining(cudnnContext, batchNormMode, this.alpha, this.beta, cudnnContext.srcTensorDesc, srcData, cudnnContext.dstTensorDesc, dstData, cudnnContext.gammaBetaTensorDesc, gammaData, betaData, decay, meanData, varData, eps, meanCache, varCache));
} else {
checkCudnn(cudnnBatchNormalizationForwardInference(cudnnContext, batchNormMode, this.alpha, this.beta, cudnnContext.srcTensorDesc, srcData, cudnnContext.dstTensorDesc, dstData, cudnnContext.gammaBetaTensorDesc, gammaData, betaData, meanData, varData, eps));
}
allocator.getFlowController().registerActionAllWrite(context, x, activations, gamma, beta, mean, var);
return activations;
}
Aggregations