use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class JCublasNDArray method leverageTo.
@Override
public INDArray leverageTo(String id) {
if (!isAttached()) {
// log.info("Skipping detached");
return this;
}
if (!Nd4j.getWorkspaceManager().checkIfWorkspaceExists(id)) {
// log.info("Skipping non-existent");
return this;
}
MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
MemoryWorkspace target = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(id);
if (current == target) {
// log.info("Skipping equals A");
return this;
}
if (this.data.getParentWorkspace() == target) {
// log.info("Skipping equals B");
return this;
}
Nd4j.getMemoryManager().setCurrentWorkspace(target);
// log.info("Leveraging...");
INDArray copy = null;
if (!this.isView()) {
// if (1 < 0) {
Nd4j.getExecutioner().commit();
DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
/*
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointDst.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memsetAsync 1 failed");
context.syncOldStream();
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointSrc.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memsetAsync 2 failed");
context.syncOldStream();
*/
if (pointSrc.isActualOnDeviceSide()) {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
} else {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
}
context.syncOldStream();
copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
// tag buffer as valid on device side
pointDst.tickHostRead();
pointDst.tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
} else {
copy = this.dup(this.ordering());
Nd4j.getExecutioner().commit();
}
Nd4j.getMemoryManager().setCurrentWorkspace(current);
return copy;
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class JCublasNDArray method migrate.
/**
* This method pulls this INDArray into current Workspace.
*
* PLEASE NOTE: If there's no current Workspace - INDArray returned as is
*
* @return
*/
@Override
public INDArray migrate() {
MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
if (current == null)
return this;
INDArray copy = null;
if (!this.isView()) {
Nd4j.getExecutioner().commit();
DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
// CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
if (pointSrc.isActualOnDeviceSide()) {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
} else {
if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
}
context.syncOldStream();
if (pointDst.getDeviceId() != Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId()) {
// log.info("Swapping [{}] -> [{}]", pointDst.getDeviceId(), Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
pointDst.setDeviceId(Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
}
copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
// tag buffer as valid on device side
pointDst.tickHostRead();
pointDst.tickDeviceWrite();
AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
} else {
copy = this.dup(this.ordering());
}
return copy;
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class CudaDirectProviderTest method mallocDevice.
@Test
public void mallocDevice() throws Exception {
CudaDirectProvider provider = new CudaDirectProvider();
AllocationShape shape = new AllocationShape(300000, 4, DataBuffer.Type.FLOAT);
AllocationPoint point = new AllocationPoint();
point.setShape(shape);
point.setPointers(provider.malloc(shape, point, AllocationStatus.DEVICE));
System.out.println("Allocated...");
Thread.sleep(1000);
point.setAllocationStatus(AllocationStatus.DEVICE);
provider.free(point);
System.out.println("Deallocated...");
Thread.sleep(1000);
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class AsynchronousFlowController method setWriteLane.
protected void setWriteLane(INDArray array, cudaEvent_t event) {
AllocationPoint point = allocator.getAllocationPoint(array);
point.setWriteLane(event);
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class AsynchronousFlowController method setReadLane.
protected void setReadLane(INDArray array, cudaEvent_t event) {
AllocationPoint point = allocator.getAllocationPoint(array);
point.addReadLane(event);
}
Aggregations