use of org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer in project nd4j by deeplearning4j.
the class BasicTADManager method getTADOnlyShapeInfo.
@Override
public Pair<DataBuffer, DataBuffer> getTADOnlyShapeInfo(INDArray array, int[] dimension) {
if (dimension != null && dimension.length > 1)
Arrays.sort(dimension);
if (dimension == null)
dimension = new int[] { Integer.MAX_VALUE };
boolean isScalar = dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE);
// FIXME: this is fast triage, remove it later
// dimensionLength <= 1 ? 2 : dimensionLength;
int targetRank = isScalar ? 2 : array.rank();
long offsetLength = 0;
long tadLength = 1;
if (!isScalar)
for (int i = 0; i < dimension.length; i++) {
tadLength *= array.shape()[dimension[i]];
}
if (!isScalar)
offsetLength = array.lengthLong() / tadLength;
else
offsetLength = 1;
// logger.info("Original shape info before TAD: {}", array.shapeInfoDataBuffer());
// logger.info("dimension: {}, tadLength: {}, offsetLength for TAD: {}", Arrays.toString(dimension),tadLength, offsetLength);
DataBuffer outputBuffer = new CudaIntDataBuffer(targetRank * 2 + 4);
DataBuffer offsetsBuffer = new CudaLongDataBuffer(offsetLength);
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
DataBuffer dimensionBuffer = AtomicAllocator.getInstance().getConstantBuffer(dimension);
Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(dimensionBuffer);
Pointer xShapeInfo = AddressRetriever.retrieveHostPointer(array.shapeInfoDataBuffer());
Pointer targetPointer = AddressRetriever.retrieveHostPointer(outputBuffer);
Pointer offsetsPointer = AddressRetriever.retrieveHostPointer(offsetsBuffer);
if (!isScalar)
nativeOps.tadOnlyShapeInfo((IntPointer) xShapeInfo, (IntPointer) dimensionPointer, dimension.length, (IntPointer) targetPointer, new LongPointerWrapper(offsetsPointer));
else {
outputBuffer.put(0, 2);
outputBuffer.put(1, 1);
outputBuffer.put(2, 1);
outputBuffer.put(3, 1);
outputBuffer.put(4, 1);
outputBuffer.put(5, 0);
outputBuffer.put(6, 0);
outputBuffer.put(7, 99);
}
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
return new Pair<>(outputBuffer, offsetsBuffer);
}
use of org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method pullRows.
/**
* This method produces concatenated array, that consist from tensors, fetched from source array, against some dimension and specified indexes
*
* @param source source tensor
* @param sourceDimension dimension of source tensor
* @param indexes indexes from source array
* @return
*/
@Override
public INDArray pullRows(INDArray source, int sourceDimension, int[] indexes, char order) {
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
if (indexes == null || indexes.length < 1)
throw new IllegalStateException("Indexes can't be null or zero-length");
int[] shape = null;
if (sourceDimension == 1)
shape = new int[] { indexes.length, source.shape()[sourceDimension] };
else if (sourceDimension == 0)
shape = new int[] { source.shape()[sourceDimension], indexes.length };
else
throw new UnsupportedOperationException("2D input is expected");
INDArray ret = Nd4j.createUninitialized(shape, order);
AtomicAllocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(ret, source);
Pointer x = AtomicAllocator.getInstance().getPointer(source, context);
Pointer xShape = AtomicAllocator.getInstance().getPointer(source.shapeInfoDataBuffer(), context);
Pointer z = AtomicAllocator.getInstance().getPointer(ret, context);
Pointer zShape = AtomicAllocator.getInstance().getPointer(ret.shapeInfoDataBuffer(), context);
PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer());
CudaIntDataBuffer tempIndexes = new CudaIntDataBuffer(indexes.length);
AtomicAllocator.getInstance().memcpyBlocking(tempIndexes, new IntPointer(indexes), indexes.length * 4, 0);
Pointer pIndex = AtomicAllocator.getInstance().getPointer(tempIndexes, context);
TADManager tadManager = Nd4j.getExecutioner().getTADManager();
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(source, new int[] { sourceDimension });
Pair<DataBuffer, DataBuffer> zTadBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { sourceDimension });
Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
Pointer zTadShapeInfo = AtomicAllocator.getInstance().getPointer(zTadBuffers.getFirst(), context);
DataBuffer offsets = tadBuffers.getSecond();
Pointer tadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context);
Pointer zTadOffsets = AtomicAllocator.getInstance().getPointer(zTadBuffers.getSecond(), context);
if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.pullRowsDouble(extras, (DoublePointer) x, (IntPointer) xShape, (DoublePointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
} else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.pullRowsFloat(extras, (FloatPointer) x, (IntPointer) xShape, (FloatPointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
} else {
nativeOps.pullRowsHalf(extras, (ShortPointer) x, (IntPointer) xShape, (ShortPointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
}
allocator.registerAction(context, ret, source);
return ret;
}
use of org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method shuffle.
/**
* Symmetric in place shuffle of an ndarray
* along a specified set of dimensions. Each array in list should have it's own dimension at the same index of dimensions array
*
* @param arrays the ndarrays to shuffle
* @param dimensions the dimensions to do the shuffle
* @return
*/
@Override
public void shuffle(List<INDArray> arrays, Random rnd, List<int[]> dimensions) {
// no dimension - no shuffle
if (dimensions == null || dimensions.size() == 0)
throw new RuntimeException("Dimension can't be null or 0-length");
if (arrays == null || arrays.size() == 0)
throw new RuntimeException("No input arrays provided");
if (dimensions.size() > 1 && arrays.size() != dimensions.size())
throw new IllegalStateException("Number of dimensions do not match number of arrays to shuffle");
Nd4j.getExecutioner().push();
// first we build TAD for input array and dimensions
AtomicAllocator allocator = AtomicAllocator.getInstance();
CudaContext context = null;
for (int x = 0; x < arrays.size(); x++) {
context = allocator.getFlowController().prepareAction(arrays.get(x));
}
int tadLength = 1;
for (int i = 0; i < dimensions.get(0).length; i++) {
tadLength *= arrays.get(0).shape()[dimensions.get(0)[i]];
}
int numTads = arrays.get(0).length() / tadLength;
int[] map = ArrayUtil.buildInterleavedVector(rnd, numTads);
CudaIntDataBuffer shuffle = new CudaIntDataBuffer(map);
Pointer shuffleMap = allocator.getPointer(shuffle, context);
PointerPointer extras = new // not used
PointerPointer(// not used
null, context.getOldStream(), allocator.getDeviceIdPointer());
long[] xPointers = new long[arrays.size()];
long[] xShapes = new long[arrays.size()];
long[] tadShapes = new long[arrays.size()];
long[] tadOffsets = new long[arrays.size()];
for (int i = 0; i < arrays.size(); i++) {
INDArray array = arrays.get(i);
Pointer x = AtomicAllocator.getInstance().getPointer(array, context);
Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer(), context);
TADManager tadManager = Nd4j.getExecutioner().getTADManager();
int[] dimension = dimensions.size() > 1 ? dimensions.get(i) : dimensions.get(0);
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(array, dimension);
// log.info("Original shape: {}; dimension: {}; TAD shape: {}", array.shapeInfoDataBuffer().asInt(), dimension, tadBuffers.getFirst().asInt());
Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
DataBuffer offsets = tadBuffers.getSecond();
if (offsets.length() != numTads)
throw new ND4JIllegalStateException("Can't symmetrically shuffle arrays with non-equal number of TADs");
Pointer tadOffset = AtomicAllocator.getInstance().getPointer(offsets, context);
xPointers[i] = x.address();
xShapes[i] = xShapeInfo.address();
tadShapes[i] = tadShapeInfo.address();
tadOffsets[i] = tadOffset.address();
}
CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.size());
CudaDoubleDataBuffer tempShapes = new CudaDoubleDataBuffer(arrays.size());
CudaDoubleDataBuffer tempTAD = new CudaDoubleDataBuffer(arrays.size());
CudaDoubleDataBuffer tempOffsets = new CudaDoubleDataBuffer(arrays.size());
AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(xShapes), xPointers.length * 8, 0);
AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadShapes), xPointers.length * 8, 0);
AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(tadOffsets), xPointers.length * 8, 0);
if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.shuffleDouble(extras, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
} else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
nativeOps.shuffleFloat(extras, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
} else {
// HALFs
nativeOps.shuffleHalf(extras, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
}
for (int f = 0; f < arrays.size(); f++) {
allocator.getFlowController().registerAction(context, arrays.get(f));
}
// just to keep reference
shuffle.address();
tempX.dataType();
tempShapes.dataType();
tempOffsets.dataType();
tempTAD.dataType();
}
Aggregations