use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method average.
@Override
public INDArray average(INDArray target, INDArray[] arrays) {
if (arrays == null || arrays.length == 0)
throw new RuntimeException("Input arrays are missing");
if (arrays.length == 1)
return target.assign(arrays[0]);
// we do averaging on GPU only if ALL devices have p2p links
if (nativeOps.isP2PAvailable() && CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed()) {
Nd4j.getExecutioner().push();
long len = target != null ? target.lengthLong() : arrays[0].lengthLong();
AtomicAllocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(target, arrays);
PointerPointer extras = new // not used
PointerPointer(// not used
null, context.getOldStream(), allocator.getDeviceIdPointer(), new CudaPointer(0));
Pointer z = target == null ? null : AtomicAllocator.getInstance().getPointer(target, context);
long[] xPointers = new long[arrays.length];
for (int i = 0; i < arrays.length; i++) {
if (arrays[i].elementWiseStride() != 1)
throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
if (arrays[i].lengthLong() != len)
throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
AllocationPoint point = allocator.getAllocationPoint(arrays[i]);
xPointers[i] = point.getPointers().getDevicePointer().address();
point.tickDeviceWrite();
}
CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.length);
allocator.memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
PointerPointer x = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context));
if (arrays[0].data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.averageDouble(extras, x, target == null ? null : (DoublePointer) z, arrays.length, len, true);
} else if (arrays[0].data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.averageFloat(extras, x, target == null ? null : (FloatPointer) z, arrays.length, len, true);
} else {
nativeOps.averageHalf(extras, x, target == null ? null : (ShortPointer) z, arrays.length, len, true);
}
allocator.getFlowController().registerAction(context, target, arrays);
tempX.address();
return target;
} else {
// otherwise we do averging on CPU side
/**
* We expect all operations are complete at this point
*/
long len = target == null ? arrays[0].lengthLong() : target.lengthLong();
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
PointerPointer dataPointers = new PointerPointer(arrays.length);
PointerPointer extras = new // not used
PointerPointer(// not used
null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), new CudaPointer(1));
for (int i = 0; i < arrays.length; i++) {
Nd4j.getCompressor().autoDecompress(arrays[i]);
if (arrays[i].elementWiseStride() != 1)
throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
if (arrays[i].lengthLong() != len)
throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
dataPointers.put(i, AtomicAllocator.getInstance().getHostPointer(arrays[i]));
}
if (arrays[0].data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.averageDouble(extras, dataPointers, target == null ? null : (DoublePointer) AtomicAllocator.getInstance().getHostPointer(target), arrays.length, len, true);
} else if (arrays[0].data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.averageFloat(extras, dataPointers, target == null ? null : (FloatPointer) AtomicAllocator.getInstance().getHostPointer(target), arrays.length, len, true);
} else {
nativeOps.averageHalf(extras, dataPointers, target == null ? null : (ShortPointer) AtomicAllocator.getInstance().getHostPointer(target), arrays.length, len, true);
}
if (target != null)
AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite();
// TODO: make propagation optional maybe?
if (true) {
for (int i = 0; i < arrays.length; i++) {
AtomicAllocator.getInstance().getAllocationPoint(arrays[i]).tickHostWrite();
}
}
return target;
}
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class AsynchronousFlowControllerTest method testDependencies4.
@Test
public void testDependencies4() throws Exception {
INDArray arrayWrite = Nd4j.create(new float[] { 1f, 2f, 3f });
INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f });
// we use synchronization to make sure it completes activeWrite caused by array creation
String arrayContents = array.toString();
AllocationPoint point = allocator.getAllocationPoint(array);
AllocationPoint pointWrite = allocator.getAllocationPoint(arrayWrite);
assertPointHasNoDependencies(point);
controller.cutTail();
CudaContext context = controller.prepareAction(arrayWrite, array);
controller.registerAction(context, arrayWrite, array);
assertTrue(controller.hasActiveReads(point));
assertFalse(controller.hasActiveReads(pointWrite));
assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
Configuration configuration = CudaEnvironment.getInstance().getConfiguration();
controller.sweepTail();
assertTrue(controller.hasActiveReads(point));
assertFalse(controller.hasActiveReads(pointWrite));
assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
controller.sweepTail();
assertTrue(controller.hasActiveReads(point));
assertFalse(controller.hasActiveReads(pointWrite));
assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
for (int i = 0; i < configuration.getCommandQueueLength(); i++) controller.sweepTail();
assertPointHasNoDependencies(point);
assertPointHasNoDependencies(pointWrite);
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class AsynchronousFlowControllerTest method testDependencies1.
@Test
public void testDependencies1() throws Exception {
INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f });
// we use synchronization to make sure it completes activeWrite caused by array creation
String arrayContents = array.toString();
AllocationPoint point = allocator.getAllocationPoint(array);
assertPointHasNoDependencies(point);
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class DelayedMemoryTest method testDelayedAllocation4.
@Test
public void testDelayedAllocation4() throws Exception {
INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f, 4f, 5f });
AllocationPoint pointer = AtomicAllocator.getInstance().getAllocationPoint(array);
PointersPair pair = pointer.getPointers();
// pointers should be equal, device memory wasn't allocated yet
assertEquals(pair.getDevicePointer(), pair.getHostPointer());
assertEquals(2.0f, array.getFloat(1), 0.001f);
assertEquals(pair.getDevicePointer(), pair.getHostPointer());
String temp = System.getProperty("java.io.tmpdir");
String outPath = FilenameUtils.concat(temp, "dl4jtestserialization.bin");
try (DataOutputStream dos = new DataOutputStream(Files.newOutputStream(Paths.get(outPath)))) {
Nd4j.write(array, dos);
}
INDArray in;
try (DataInputStream dis = new DataInputStream(new FileInputStream(outPath))) {
in = Nd4j.read(dis);
}
assertEquals(AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getDevicePointer(), AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getHostPointer());
assertEquals(array, in);
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class DelayedMemoryTest method testDelayedDup1.
@Test
public void testDelayedDup1() throws Exception {
INDArray array = Nd4j.linspace(1, 1000, 1000).reshape(10, 10, 10);
AllocationPoint pointShape = AtomicAllocator.getInstance().getAllocationPoint(array.shapeInfoDataBuffer());
AllocationPoint pointArray = AtomicAllocator.getInstance().getAllocationPoint(array);
assertEquals(AllocationStatus.HOST, pointArray.getAllocationStatus());
assertEquals(AllocationStatus.HOST, pointShape.getAllocationStatus());
float sum = array.sumNumber().floatValue();
pointShape = AtomicAllocator.getInstance().getAllocationPoint(array.shapeInfoDataBuffer());
pointArray = AtomicAllocator.getInstance().getAllocationPoint(array);
assertEquals(AllocationStatus.DEVICE, pointArray.getAllocationStatus());
assertEquals(AllocationStatus.CONSTANT, pointShape.getAllocationStatus());
INDArray dup = array.dup();
AllocationPoint dupShape = AtomicAllocator.getInstance().getAllocationPoint(dup.shapeInfoDataBuffer());
AllocationPoint dupArray = AtomicAllocator.getInstance().getAllocationPoint(dup);
assertEquals(AllocationStatus.DEVICE, dupArray.getAllocationStatus());
assertEquals(AllocationStatus.CONSTANT, dupShape.getAllocationStatus());
}
Aggregations