use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method concat.
@Override
public INDArray concat(int dimension, INDArray... toConcat) {
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
if (toConcat.length == 1)
return toConcat[0];
int sumAlongDim = 0;
for (int i = 0; i < toConcat.length; i++) {
if (toConcat[i].isCompressed())
Nd4j.getCompressor().decompressi(toConcat[i]);
sumAlongDim += toConcat[i].size(dimension);
}
int[] outputShape = ArrayUtil.copy(toConcat[0].shape());
outputShape[dimension] = sumAlongDim;
INDArray ret = Nd4j.createUninitialized(outputShape, Nd4j.order());
AtomicAllocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(ret, toConcat);
long[] shapeInfoPointers = new long[toConcat.length];
long[] dataPointers = new long[toConcat.length];
long[] tadPointers = new long[toConcat.length];
long[] offsetsPointers = new long[toConcat.length];
long[] hostShapeInfoPointers = new long[toConcat.length];
TADManager tadManager = Nd4j.getExecutioner().getTADManager();
for (int i = 0; i < toConcat.length; i++) {
shapeInfoPointers[i] = AddressRetriever.retrieveDeviceAddress(toConcat[i].shapeInfoDataBuffer(), context);
dataPointers[i] = AtomicAllocator.getInstance().getPointer(toConcat[i], context).address();
hostShapeInfoPointers[i] = AtomicAllocator.getInstance().getHostPointer(toConcat[i].shapeInfoDataBuffer()).address();
sumAlongDim += toConcat[i].size(dimension);
for (int j = 0; j < toConcat[i].rank(); j++) if (j != dimension && toConcat[i].size(j) != outputShape[j]) {
throw new IllegalArgumentException("Illegal concatenation at array " + i + " and shape element " + j);
}
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(toConcat[i], new int[] { dimension });
long devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context).address();
DataBuffer offsets = tadBuffers.getSecond();
long devTadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context).address();
tadPointers[i] = devTadShapeInfo;
offsetsPointers[i] = devTadOffsets;
}
// getting tadOnlyShape for result
Pair<DataBuffer, DataBuffer> zBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { dimension });
// System.out.println("shapePointers: " + Arrays.toString(shapeInfoPointers));
Pointer dZ = AtomicAllocator.getInstance().getPointer(ret, context);
Pointer dZShapeInfo = AddressRetriever.retrieveDevicePointer(ret.shapeInfoDataBuffer(), context);
CudaDoubleDataBuffer tempData = new CudaDoubleDataBuffer(toConcat.length);
CudaDoubleDataBuffer tempShapes = new CudaDoubleDataBuffer(toConcat.length);
CudaDoubleDataBuffer tempTAD = new CudaDoubleDataBuffer(toConcat.length);
CudaDoubleDataBuffer tempOffsets = new CudaDoubleDataBuffer(toConcat.length);
AtomicAllocator.getInstance().memcpyBlocking(tempData, new LongPointer(dataPointers), dataPointers.length * 8, 0);
AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(shapeInfoPointers), shapeInfoPointers.length * 8, 0);
AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadPointers), tadPointers.length * 8, 0);
AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(offsetsPointers), offsetsPointers.length * 8, 0);
Pointer dataPointer = AtomicAllocator.getInstance().getPointer(tempData, context);
Pointer shapesPointer = AtomicAllocator.getInstance().getPointer(tempShapes, context);
Pointer tadPointer = AtomicAllocator.getInstance().getPointer(tempTAD, context);
Pointer offsetPointer = AtomicAllocator.getInstance().getPointer(tempOffsets, context);
// System.out.println("ShapesPointer after conversion: " + shapesPointer);
PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), AddressRetriever.retrieveHostPointer(toConcat[0].shapeInfoDataBuffer()), AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), new LongPointer(hostShapeInfoPointers), // getting zTADShape
AtomicAllocator.getInstance().getPointer(zBuffers.getFirst(), context), // getting zOffset
AtomicAllocator.getInstance().getPointer(zBuffers.getSecond(), context));
if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.concatDouble(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (DoublePointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
} else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.concatFloat(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (FloatPointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
} else {
nativeOps.concatHalf(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (ShortPointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
}
allocator.registerAction(context, ret, toConcat);
return ret;
// return super.concat(dimension, toConcat);
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method sort.
@Override
public INDArray sort(INDArray x, boolean descending, int... dimension) {
if (x.isScalar())
return x;
Arrays.sort(dimension);
Nd4j.getExecutioner().push();
Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(x, dimension);
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(x);
PointerPointer extraz = new // not used
PointerPointer(// not used
AtomicAllocator.getInstance().getHostPointer(x.shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer());
Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), context);
if (x.data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.sortTadFloat(extraz, (FloatPointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
} else if (x.data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.sortTadDouble(extraz, (DoublePointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
} else if (x.data().dataType() == DataBuffer.Type.HALF) {
nativeOps.sortTadHalf(extraz, (ShortPointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
} else {
throw new UnsupportedOperationException("Unknown dataType " + x.data().dataType());
}
AtomicAllocator.getInstance().getFlowController().registerAction(context, x);
return x;
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method tear.
public INDArray[] tear(INDArray tensor, int... dimensions) {
if (tensor.isCompressed())
Nd4j.getCompressor().decompressi(tensor);
Arrays.sort(dimensions);
Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(tensor, dimensions);
long tadLength = 1;
int[] shape = new int[dimensions.length];
for (int i = 0; i < dimensions.length; i++) {
tadLength *= tensor.shape()[dimensions[i]];
shape[i] = tensor.shape()[dimensions[i]];
}
int numTads = (int) (tensor.lengthLong() / tadLength);
INDArray[] result = new INDArray[numTads];
long[] xPointers = new long[numTads];
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(null, tensor);
for (int x = 0; x < numTads; x++) {
result[x] = Nd4j.createUninitialized(shape);
context = AtomicAllocator.getInstance().getFlowController().prepareAction(result[x]);
xPointers[x] = AtomicAllocator.getInstance().getPointer(result[x], context).address();
}
CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(numTads);
AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
PointerPointer extraz = new // not used
PointerPointer(// not used
null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer());
if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.tearDouble(extraz, (DoublePointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
} else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
nativeOps.tearFloat(extraz, (FloatPointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
} else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
nativeOps.tearHalf(extraz, (ShortPointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
}
AtomicAllocator.getInstance().getFlowController().registerActionAllWrite(context, result);
AtomicAllocator.getInstance().getFlowController().registerAction(context, null, result);
return result;
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method createFromNpyPointer.
/**
* Create from an in memory numpy pointer
*
* @param pointer the pointer to the
* numpy array
* @return an ndarray created from the in memory
* numpy pointer
*/
@Override
public INDArray createFromNpyPointer(Pointer pointer) {
Pointer dataPointer = nativeOps.dataPointForNumpy(pointer);
int dataBufferElementSize = nativeOps.elementSizeForNpyArray(pointer);
DataBuffer data = null;
Pointer shapeBufferPointer = nativeOps.shapeBufferForNumpy(pointer);
int length = nativeOps.lengthForShapeBufferPointer(shapeBufferPointer);
shapeBufferPointer.capacity(4 * length);
shapeBufferPointer.limit(4 * length);
shapeBufferPointer.position(0);
IntPointer intPointer = new IntPointer(shapeBufferPointer);
DataBuffer shapeBuffer = Nd4j.createBuffer(shapeBufferPointer, DataBuffer.Type.INT, length, IntIndexer.create(intPointer));
dataPointer.position(0);
dataPointer.limit(dataBufferElementSize * Shape.length(shapeBuffer));
dataPointer.capacity(dataBufferElementSize * Shape.length(shapeBuffer));
// we don't care about pointers here, they will be copied in BaseCudaDataBuffer method, and indexer will be recreated
if (dataBufferElementSize == (Float.SIZE / 8)) {
data = Nd4j.createBuffer(dataPointer, DataBuffer.Type.FLOAT, Shape.length(shapeBuffer), FloatIndexer.create(new FloatPointer(dataPointer)));
} else if (dataBufferElementSize == (Double.SIZE / 8)) {
data = Nd4j.createBuffer(dataPointer, DataBuffer.Type.DOUBLE, Shape.length(shapeBuffer), DoubleIndexer.create(new DoublePointer(dataPointer)));
}
INDArray ret = Nd4j.create(data, Shape.shape(shapeBuffer), Shape.strideArr(shapeBuffer), Shape.offset(shapeBuffer), Shape.order(shapeBuffer));
return ret;
}
use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.
the class CudaFloatDataBufferTest method testPut.
@Test
public void testPut() throws Exception {
DataBuffer buffer = Nd4j.createBuffer(new float[] { 1f, 2f, 3f, 4f });
buffer.put(2, 16f);
assertEquals(16.0f, buffer.getFloat(2), 0.001f);
System.out.println("Data: " + buffer);
}
Aggregations