use of org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer in project nd4j by deeplearning4j.
the class BasicTADManager method getTADOnlyShapeInfo.
@Override
public Pair<DataBuffer, DataBuffer> getTADOnlyShapeInfo(INDArray array, int[] dimension) {
if (dimension != null && dimension.length > 1)
Arrays.sort(dimension);
if (dimension == null)
dimension = new int[] { Integer.MAX_VALUE };
boolean isScalar = dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE);
// FIXME: this is fast triage, remove it later
// dimensionLength <= 1 ? 2 : dimensionLength;
int targetRank = isScalar ? 2 : array.rank();
long offsetLength = 0;
long tadLength = 1;
if (!isScalar)
for (int i = 0; i < dimension.length; i++) {
tadLength *= array.shape()[dimension[i]];
}
if (!isScalar)
offsetLength = array.lengthLong() / tadLength;
else
offsetLength = 1;
// logger.info("Original shape info before TAD: {}", array.shapeInfoDataBuffer());
// logger.info("dimension: {}, tadLength: {}, offsetLength for TAD: {}", Arrays.toString(dimension),tadLength, offsetLength);
DataBuffer outputBuffer = new CudaIntDataBuffer(targetRank * 2 + 4);
DataBuffer offsetsBuffer = new CudaLongDataBuffer(offsetLength);
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
DataBuffer dimensionBuffer = AtomicAllocator.getInstance().getConstantBuffer(dimension);
Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(dimensionBuffer);
Pointer xShapeInfo = AddressRetriever.retrieveHostPointer(array.shapeInfoDataBuffer());
Pointer targetPointer = AddressRetriever.retrieveHostPointer(outputBuffer);
Pointer offsetsPointer = AddressRetriever.retrieveHostPointer(offsetsBuffer);
if (!isScalar)
nativeOps.tadOnlyShapeInfo((IntPointer) xShapeInfo, (IntPointer) dimensionPointer, dimension.length, (IntPointer) targetPointer, new LongPointerWrapper(offsetsPointer));
else {
outputBuffer.put(0, 2);
outputBuffer.put(1, 1);
outputBuffer.put(2, 1);
outputBuffer.put(3, 1);
outputBuffer.put(4, 1);
outputBuffer.put(5, 0);
outputBuffer.put(6, 0);
outputBuffer.put(7, 99);
}
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
return new Pair<>(outputBuffer, offsetsBuffer);
}
Aggregations