use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class ConstantBuffersCache method getConstantBuffer.
@Override
public DataBuffer getConstantBuffer(int[] array) {
ArrayDescriptor descriptor = new ArrayDescriptor(array);
if (!buffersCache.containsKey(descriptor)) {
DataBuffer buffer = Nd4j.createBufferDetached(array);
// we always allow int arrays with length < 3. 99.9% it's just dimension array. we don't want to recreate them over and over
if (counter.get() < MAX_ENTRIES || array.length < 4) {
counter.incrementAndGet();
buffersCache.put(descriptor, buffer);
bytes.addAndGet(array.length * 4);
}
return buffer;
}
return buffersCache.get(descriptor);
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class ConstantBuffersCache method getConstantBuffer.
@Override
public DataBuffer getConstantBuffer(float[] array) {
ArrayDescriptor descriptor = new ArrayDescriptor(array);
if (!buffersCache.containsKey(descriptor)) {
DataBuffer buffer = Nd4j.createBufferDetached(array);
if (counter.get() < MAX_ENTRIES) {
counter.incrementAndGet();
buffersCache.put(descriptor, buffer);
bytes.addAndGet(array.length * Nd4j.sizeOfDataType());
}
return buffer;
}
return buffersCache.get(descriptor);
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class DirectSparseInfoProvider method createSparseInformation.
@Override
public DataBuffer createSparseInformation(int[] flags, long[] sparseOffsets, int[] hiddenDimensions, int underlyingRank) {
SparseDescriptor descriptor = new SparseDescriptor(flags, sparseOffsets, hiddenDimensions, underlyingRank);
if (!sparseCache.containsKey(descriptor)) {
if (counter.get() < MAX_ENTRIES) {
if (!sparseCache.containsKey(descriptor)) {
counter.incrementAndGet();
DataBuffer buffer = Shape.createSparseInformation(flags, sparseOffsets, hiddenDimensions, underlyingRank);
sparseCache.put(descriptor, buffer);
return buffer;
}
} else {
return Shape.createSparseInformation(flags, sparseOffsets, hiddenDimensions, underlyingRank);
}
}
return sparseCache.get(descriptor);
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArray method leverageTo.
@Override
public INDArray leverageTo(String id) {
if (!isAttached()) {
// log.info("Skipping detached");
return this;
}
if (!Nd4j.getWorkspaceManager().checkIfWorkspaceExists(id)) {
// log.info("Skipping non-existent");
return this;
}
MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
MemoryWorkspace target = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(id);
if (current == target) {
// log.info("Skipping equals A");
return this;
}
if (this.data.getParentWorkspace() == target) {
// log.info("Skipping equals B");
return this;
}
Nd4j.getMemoryManager().setCurrentWorkspace(target);
// log.info("Leveraging...");
INDArray copy = null;
if (!this.isView()) {
// if (1 < 0) {
Nd4j.getExecutioner().commit();
DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
/*
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointDst.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memsetAsync 1 failed");
context.syncOldStream();
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointSrc.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memsetAsync 2 failed");
context.syncOldStream();
*/
if (pointSrc.isActualOnDeviceSide()) {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
} else {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
}
context.syncOldStream();
copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
// tag buffer as valid on device side
pointDst.tickHostRead();
pointDst.tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
} else {
copy = this.dup(this.ordering());
Nd4j.getExecutioner().commit();
}
Nd4j.getMemoryManager().setCurrentWorkspace(current);
return copy;
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArray method migrate.
/**
* This method pulls this INDArray into current Workspace.
*
* PLEASE NOTE: If there's no current Workspace - INDArray returned as is
*
* @return
*/
@Override
public INDArray migrate() {
MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
if (current == null)
return this;
INDArray copy = null;
if (!this.isView()) {
Nd4j.getExecutioner().commit();
DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
// CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
if (pointSrc.isActualOnDeviceSide()) {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
} else {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
}
context.syncOldStream();
if (pointDst.getDeviceId() != Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId()) {
// log.info("Swapping [{}] -> [{}]", pointDst.getDeviceId(), Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
pointDst.setDeviceId(Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
}
copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
// tag buffer as valid on device side
pointDst.tickHostRead();
pointDst.tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
} else {
copy = this.dup(this.ordering());
}
return copy;
}
Aggregations