use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class CudaZeroHandler method pickupHostAllocation.
private void pickupHostAllocation(AllocationPoint point) {
int numBuckets = configuration.getNumberOfGcThreads();
long bucketId = RandomUtils.nextInt(0, numBuckets);
long reqMemory = AllocationUtils.getRequiredMemory(point.getShape());
zeroUseCounter.addAndGet(reqMemory);
point.setBucketId(bucketId);
if (!zeroAllocations.containsKey(bucketId)) {
log.debug("Creating bucketID: " + bucketId);
synchronized (this) {
if (!zeroAllocations.containsKey(bucketId)) {
zeroAllocations.put(bucketId, new ConcurrentHashMap<Long, Long>());
}
}
}
zeroAllocations.get(bucketId).put(point.getObjectId(), point.getObjectId());
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class CudaZeroHandler method memcpyDevice.
@Override
public void memcpyDevice(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset, CudaContext context) {
// log.info("Memcpy device: {} bytes ", length);
AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
Pointer dP = new CudaPointer((point.getPointers().getDevicePointer().address()) + dstOffset);
if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
point.tickDeviceWrite();
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class SporadicTests method testReplicate5.
@Test
public void testReplicate5() throws Exception {
INDArray array = Nd4j.create(3, 3);
log.error("Original: Host pt: {}; Dev pt: {}", AtomicAllocator.getInstance().getAllocationPoint(array).getPointers().getHostPointer().address(), AtomicAllocator.getInstance().getAllocationPoint(array).getPointers().getDevicePointer().address());
final DeviceLocalNDArray locals = new DeviceLocalNDArray(array);
int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
for (int t = 0; t < numDevices; t++) {
log.error("deviceId: {}; Host pt: {}; Dev pt: {}", t, AtomicAllocator.getInstance().getAllocationPoint(locals.get(t)).getPointers().getHostPointer().address(), AtomicAllocator.getInstance().getAllocationPoint(locals.get(t)).getPointers().getDevicePointer().address());
}
Thread[] threads = new Thread[numDevices];
for (int t = 0; t < numDevices; t++) {
threads[t] = new Thread(new Runnable() {
@Override
public void run() {
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(locals.get());
log.error("deviceId: {}; Host pt: {}; Dev pt: {}", Nd4j.getAffinityManager().getDeviceForCurrentThread(), point.getPointers().getHostPointer().address(), point.getPointers().getDevicePointer().address());
}
});
threads[t].start();
}
for (int t = 0; t < numDevices; t++) {
threads[t].join();
}
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class SporadicTests method testReplicate3.
@Test
public void testReplicate3() throws Exception {
INDArray array = Nd4j.ones(10, 10);
INDArray exp = Nd4j.create(10).assign(10f);
log.error("Array length: {}", array.length());
int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
final DeviceLocalNDArray locals = new DeviceLocalNDArray(array);
Thread[] threads = new Thread[numDevices];
for (int t = 0; t < numDevices; t++) {
threads[t] = new Thread(new Runnable() {
@Override
public void run() {
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(locals.get());
log.error("Point deviceId: {}; current deviceId: {}", point.getDeviceId(), Nd4j.getAffinityManager().getDeviceForCurrentThread());
INDArray sum = locals.get().sum(1);
INDArray localExp = Nd4j.create(10).assign(10f);
assertEquals(localExp, sum);
}
});
threads[t].start();
}
for (int t = 0; t < numDevices; t++) {
threads[t].join();
}
for (int t = 0; t < numDevices; t++) {
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(locals.get(t));
log.error("Point deviceId: {}; current deviceId: {}", point.getDeviceId(), Nd4j.getAffinityManager().getDeviceForCurrentThread());
exp.addi(0.0f);
assertEquals(exp, locals.get(t).sum(0));
log.error("Point after: {}", point.getDeviceId());
}
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class SporadicTests method testLocality.
@Test
public void testLocality() {
INDArray array = Nd4j.create(new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array);
assertEquals(true, point.isActualOnDeviceSide());
INDArray arrayR = array.reshape('f', 3, 3);
AllocationPoint pointR = AtomicAllocator.getInstance().getAllocationPoint(arrayR);
assertEquals(true, pointR.isActualOnDeviceSide());
INDArray arrayS = Shape.newShapeNoCopy(array, new int[] { 3, 3 }, true);
AllocationPoint pointS = AtomicAllocator.getInstance().getAllocationPoint(arrayS);
assertEquals(true, pointS.isActualOnDeviceSide());
INDArray arrayL = Nd4j.create(new int[] { 3, 4, 4, 4 }, 'c');
AllocationPoint pointL = AtomicAllocator.getInstance().getAllocationPoint(arrayL);
assertEquals(true, pointL.isActualOnDeviceSide());
}
Aggregations