Search in sources :

Example 1 with TADManager

use of org.nd4j.linalg.cache.TADManager in project nd4j by deeplearning4j.

the class CpuNDArrayFactory method shuffle.

/**
 * Symmetric in place shuffle of an ndarray
 * along a specified set of dimensions. Each array in list should have it's own dimension at the same index of dimensions array
 *
 * @param arrays      the ndarrays to shuffle
 * @param dimensions the dimensions to do the shuffle
 * @return
 */
@Override
public void shuffle(List<INDArray> arrays, Random rnd, List<int[]> dimensions) {
    if (dimensions == null || dimensions.size() == 0)
        throw new RuntimeException("Dimension can't be null or 0-length");
    if (arrays == null || arrays.size() == 0)
        throw new RuntimeException("No input arrays provided");
    if (dimensions.size() > 1 && arrays.size() != dimensions.size())
        throw new IllegalStateException("Number of dimensions do not match number of arrays to shuffle");
    int tadLength = 1;
    for (int i = 0; i < dimensions.get(0).length; i++) {
        tadLength *= arrays.get(0).shape()[dimensions.get(0)[i]];
    }
    int numTads = arrays.get(0).length() / tadLength;
    int[] map = ArrayUtil.buildInterleavedVector(rnd, numTads);
    PointerPointer dataPointers = new PointerPointer(arrays.size());
    PointerPointer shapePointers = new PointerPointer(arrays.size());
    PointerPointer tadPointers = new PointerPointer(arrays.size());
    PointerPointer offsetPointers = new PointerPointer(arrays.size());
    PointerPointer dummy = new PointerPointer(new Pointer[] { null });
    List<Pair<DataBuffer, DataBuffer>> list = new ArrayList<>();
    TADManager tadManager = Nd4j.getExecutioner().getTADManager();
    IntPointer ptrMap = new IntPointer(map);
    long[] ptrs = new long[arrays.size()];
    for (int i = 0; i < arrays.size(); i++) {
        INDArray array = arrays.get(i);
        Nd4j.getCompressor().autoDecompress(array);
        int[] dimension = dimensions.size() > 1 ? dimensions.get(i) : dimensions.get(0);
        Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(array, dimension);
        list.add(tadBuffers);
        Pointer hostTadShapeInfo = tadBuffers.getFirst().addressPointer();
        DataBuffer offsets = tadBuffers.getSecond();
        if (offsets.length() != numTads)
            throw new ND4JIllegalStateException("Can't symmetrically shuffle arrays with non-equal number of TADs");
        if (offsets == null)
            throw new ND4JIllegalStateException("Offsets for shuffle can't be null");
        dataPointers.put(i, array.data().addressPointer());
        shapePointers.put(i, array.shapeInfoDataBuffer().addressPointer());
        offsetPointers.put(i, offsets.addressPointer());
        tadPointers.put(i, tadBuffers.getFirst().addressPointer());
    }
    if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.shuffleDouble(dummy, dataPointers, shapePointers, dataPointers, shapePointers, arrays.size(), ptrMap, tadPointers, offsetPointers);
    } else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.shuffleFloat(dummy, dataPointers, shapePointers, dataPointers, shapePointers, arrays.size(), ptrMap, tadPointers, offsetPointers);
    } else {
    // HALFs
    }
    dataPointers.address();
    shapePointers.address();
    tadPointers.address();
    offsetPointers.address();
}
Also used : ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) TADManager(org.nd4j.linalg.cache.TADManager) Pair(org.nd4j.linalg.primitives.Pair) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer)

Example 2 with TADManager

use of org.nd4j.linalg.cache.TADManager in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method concat.

@Override
public INDArray concat(int dimension, INDArray... toConcat) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    if (toConcat.length == 1)
        return toConcat[0];
    int sumAlongDim = 0;
    for (int i = 0; i < toConcat.length; i++) {
        if (toConcat[i].isCompressed())
            Nd4j.getCompressor().decompressi(toConcat[i]);
        sumAlongDim += toConcat[i].size(dimension);
    }
    int[] outputShape = ArrayUtil.copy(toConcat[0].shape());
    outputShape[dimension] = sumAlongDim;
    INDArray ret = Nd4j.createUninitialized(outputShape, Nd4j.order());
    AtomicAllocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(ret, toConcat);
    long[] shapeInfoPointers = new long[toConcat.length];
    long[] dataPointers = new long[toConcat.length];
    long[] tadPointers = new long[toConcat.length];
    long[] offsetsPointers = new long[toConcat.length];
    long[] hostShapeInfoPointers = new long[toConcat.length];
    TADManager tadManager = Nd4j.getExecutioner().getTADManager();
    for (int i = 0; i < toConcat.length; i++) {
        shapeInfoPointers[i] = AddressRetriever.retrieveDeviceAddress(toConcat[i].shapeInfoDataBuffer(), context);
        dataPointers[i] = AtomicAllocator.getInstance().getPointer(toConcat[i], context).address();
        hostShapeInfoPointers[i] = AtomicAllocator.getInstance().getHostPointer(toConcat[i].shapeInfoDataBuffer()).address();
        sumAlongDim += toConcat[i].size(dimension);
        for (int j = 0; j < toConcat[i].rank(); j++) if (j != dimension && toConcat[i].size(j) != outputShape[j]) {
            throw new IllegalArgumentException("Illegal concatenation at array " + i + " and shape element " + j);
        }
        Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(toConcat[i], new int[] { dimension });
        long devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context).address();
        DataBuffer offsets = tadBuffers.getSecond();
        long devTadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context).address();
        tadPointers[i] = devTadShapeInfo;
        offsetsPointers[i] = devTadOffsets;
    }
    // getting tadOnlyShape for result
    Pair<DataBuffer, DataBuffer> zBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { dimension });
    // System.out.println("shapePointers: " + Arrays.toString(shapeInfoPointers));
    Pointer dZ = AtomicAllocator.getInstance().getPointer(ret, context);
    Pointer dZShapeInfo = AddressRetriever.retrieveDevicePointer(ret.shapeInfoDataBuffer(), context);
    CudaDoubleDataBuffer tempData = new CudaDoubleDataBuffer(toConcat.length);
    CudaDoubleDataBuffer tempShapes = new CudaDoubleDataBuffer(toConcat.length);
    CudaDoubleDataBuffer tempTAD = new CudaDoubleDataBuffer(toConcat.length);
    CudaDoubleDataBuffer tempOffsets = new CudaDoubleDataBuffer(toConcat.length);
    AtomicAllocator.getInstance().memcpyBlocking(tempData, new LongPointer(dataPointers), dataPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(shapeInfoPointers), shapeInfoPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadPointers), tadPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(offsetsPointers), offsetsPointers.length * 8, 0);
    Pointer dataPointer = AtomicAllocator.getInstance().getPointer(tempData, context);
    Pointer shapesPointer = AtomicAllocator.getInstance().getPointer(tempShapes, context);
    Pointer tadPointer = AtomicAllocator.getInstance().getPointer(tempTAD, context);
    Pointer offsetPointer = AtomicAllocator.getInstance().getPointer(tempOffsets, context);
    // System.out.println("ShapesPointer after conversion: " + shapesPointer);
    PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), AddressRetriever.retrieveHostPointer(toConcat[0].shapeInfoDataBuffer()), AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), new LongPointer(hostShapeInfoPointers), // getting zTADShape
    AtomicAllocator.getInstance().getPointer(zBuffers.getFirst(), context), // getting zOffset
    AtomicAllocator.getInstance().getPointer(zBuffers.getSecond(), context));
    if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.concatDouble(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (DoublePointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
    } else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.concatFloat(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (FloatPointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
    } else {
        nativeOps.concatHalf(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (ShortPointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
    }
    allocator.registerAction(context, ret, toConcat);
    return ret;
// return super.concat(dimension, toConcat);
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) TADManager(org.nd4j.linalg.cache.TADManager) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 3 with TADManager

use of org.nd4j.linalg.cache.TADManager in project nd4j by deeplearning4j.

the class BasicTADManagerTest method testTADcreation2.

@Test
public void testTADcreation2() throws Exception {
    INDArray array = Nd4j.create(10, 100);
    TADManager tadManager = new DeviceTADManager();
    DataBuffer tad = tadManager.getTADOnlyShapeInfo(array, new int[] { 0 }).getFirst();
    DataBuffer tad2 = tadManager.getTADOnlyShapeInfo(array, new int[] { 0 }).getFirst();
    System.out.println("TAD: " + tad);
    System.out.println("Shape: " + array.shapeInfoDataBuffer());
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    assertEquals(2, tad.getInt(0));
    assertEquals(1, tad.getInt(1));
    assertEquals(10, tad.getInt(2));
    assertEquals(1, tad.getInt(3));
    assertEquals(100, tad.getInt(4));
    assertEquals(0, tad.getInt(5));
    assertEquals(100, tad.getInt(6));
    assertEquals(99, tad.getInt(7));
    assertFalse(AtomicAllocator.getInstance().getAllocationPoint(tad).isActualOnDeviceSide());
    long tadPointer1 = AtomicAllocator.getInstance().getPointer(tad, context).address();
    long tadPointer2 = AtomicAllocator.getInstance().getPointer(tad2, context).address();
    assertTrue(AtomicAllocator.getInstance().getAllocationPoint(tad).isActualOnDeviceSide());
    System.out.println("tadPointer1: " + tadPointer1);
    System.out.println("tadPointer2: " + tadPointer2);
    assertEquals(tadPointer1, tadPointer2);
    AtomicAllocator.getInstance().moveToConstant(tad);
    long tadPointer3 = AtomicAllocator.getInstance().getPointer(tad, context).address();
    long tadPointer4 = AtomicAllocator.getInstance().getPointer(tad2, context).address();
    assertEquals(tadPointer4, tadPointer3);
    assertNotEquals(tadPointer1, tadPointer3);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) TADManager(org.nd4j.linalg.cache.TADManager) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) Test(org.junit.Test)

Example 4 with TADManager

use of org.nd4j.linalg.cache.TADManager in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method pullRows.

/**
 * This method produces concatenated array, that consist from tensors, fetched from source array, against some dimension and specified indexes
 *
 * @param source          source tensor
 * @param sourceDimension dimension of source tensor
 * @param indexes         indexes from source array
 * @return
 */
@Override
public INDArray pullRows(INDArray source, int sourceDimension, int[] indexes, char order) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    if (indexes == null || indexes.length < 1)
        throw new IllegalStateException("Indexes can't be null or zero-length");
    int[] shape = null;
    if (sourceDimension == 1)
        shape = new int[] { indexes.length, source.shape()[sourceDimension] };
    else if (sourceDimension == 0)
        shape = new int[] { source.shape()[sourceDimension], indexes.length };
    else
        throw new UnsupportedOperationException("2D input is expected");
    INDArray ret = Nd4j.createUninitialized(shape, order);
    AtomicAllocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(ret, source);
    Pointer x = AtomicAllocator.getInstance().getPointer(source, context);
    Pointer xShape = AtomicAllocator.getInstance().getPointer(source.shapeInfoDataBuffer(), context);
    Pointer z = AtomicAllocator.getInstance().getPointer(ret, context);
    Pointer zShape = AtomicAllocator.getInstance().getPointer(ret.shapeInfoDataBuffer(), context);
    PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer());
    CudaIntDataBuffer tempIndexes = new CudaIntDataBuffer(indexes.length);
    AtomicAllocator.getInstance().memcpyBlocking(tempIndexes, new IntPointer(indexes), indexes.length * 4, 0);
    Pointer pIndex = AtomicAllocator.getInstance().getPointer(tempIndexes, context);
    TADManager tadManager = Nd4j.getExecutioner().getTADManager();
    Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(source, new int[] { sourceDimension });
    Pair<DataBuffer, DataBuffer> zTadBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { sourceDimension });
    Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
    Pointer zTadShapeInfo = AtomicAllocator.getInstance().getPointer(zTadBuffers.getFirst(), context);
    DataBuffer offsets = tadBuffers.getSecond();
    Pointer tadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context);
    Pointer zTadOffsets = AtomicAllocator.getInstance().getPointer(zTadBuffers.getSecond(), context);
    if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.pullRowsDouble(extras, (DoublePointer) x, (IntPointer) xShape, (DoublePointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
    } else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.pullRowsFloat(extras, (FloatPointer) x, (IntPointer) xShape, (FloatPointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
    } else {
        nativeOps.pullRowsHalf(extras, (ShortPointer) x, (IntPointer) xShape, (ShortPointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
    }
    allocator.registerAction(context, ret, source);
    return ret;
}
Also used : ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) TADManager(org.nd4j.linalg.cache.TADManager) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 5 with TADManager

use of org.nd4j.linalg.cache.TADManager in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method shuffle.

/**
 * Symmetric in place shuffle of an ndarray
 * along a specified set of dimensions. Each array in list should have it's own dimension at the same index of dimensions array
 *
 * @param arrays      the ndarrays to shuffle
 * @param dimensions the dimensions to do the shuffle
 * @return
 */
@Override
public void shuffle(List<INDArray> arrays, Random rnd, List<int[]> dimensions) {
    // no dimension - no shuffle
    if (dimensions == null || dimensions.size() == 0)
        throw new RuntimeException("Dimension can't be null or 0-length");
    if (arrays == null || arrays.size() == 0)
        throw new RuntimeException("No input arrays provided");
    if (dimensions.size() > 1 && arrays.size() != dimensions.size())
        throw new IllegalStateException("Number of dimensions do not match number of arrays to shuffle");
    Nd4j.getExecutioner().push();
    // first we build TAD for input array and dimensions
    AtomicAllocator allocator = AtomicAllocator.getInstance();
    CudaContext context = null;
    for (int x = 0; x < arrays.size(); x++) {
        context = allocator.getFlowController().prepareAction(arrays.get(x));
    }
    int tadLength = 1;
    for (int i = 0; i < dimensions.get(0).length; i++) {
        tadLength *= arrays.get(0).shape()[dimensions.get(0)[i]];
    }
    int numTads = arrays.get(0).length() / tadLength;
    int[] map = ArrayUtil.buildInterleavedVector(rnd, numTads);
    CudaIntDataBuffer shuffle = new CudaIntDataBuffer(map);
    Pointer shuffleMap = allocator.getPointer(shuffle, context);
    PointerPointer extras = new // not used
    PointerPointer(// not used
    null, context.getOldStream(), allocator.getDeviceIdPointer());
    long[] xPointers = new long[arrays.size()];
    long[] xShapes = new long[arrays.size()];
    long[] tadShapes = new long[arrays.size()];
    long[] tadOffsets = new long[arrays.size()];
    for (int i = 0; i < arrays.size(); i++) {
        INDArray array = arrays.get(i);
        Pointer x = AtomicAllocator.getInstance().getPointer(array, context);
        Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer(), context);
        TADManager tadManager = Nd4j.getExecutioner().getTADManager();
        int[] dimension = dimensions.size() > 1 ? dimensions.get(i) : dimensions.get(0);
        Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(array, dimension);
        // log.info("Original shape: {}; dimension: {}; TAD shape: {}", array.shapeInfoDataBuffer().asInt(), dimension, tadBuffers.getFirst().asInt());
        Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
        DataBuffer offsets = tadBuffers.getSecond();
        if (offsets.length() != numTads)
            throw new ND4JIllegalStateException("Can't symmetrically shuffle arrays with non-equal number of TADs");
        Pointer tadOffset = AtomicAllocator.getInstance().getPointer(offsets, context);
        xPointers[i] = x.address();
        xShapes[i] = xShapeInfo.address();
        tadShapes[i] = tadShapeInfo.address();
        tadOffsets[i] = tadOffset.address();
    }
    CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.size());
    CudaDoubleDataBuffer tempShapes = new CudaDoubleDataBuffer(arrays.size());
    CudaDoubleDataBuffer tempTAD = new CudaDoubleDataBuffer(arrays.size());
    CudaDoubleDataBuffer tempOffsets = new CudaDoubleDataBuffer(arrays.size());
    AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(xShapes), xPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadShapes), xPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(tadOffsets), xPointers.length * 8, 0);
    if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.shuffleDouble(extras, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
    } else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.shuffleFloat(extras, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
    } else {
        // HALFs
        nativeOps.shuffleHalf(extras, new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), new PointerPointer(allocator.getPointer(tempX, context)), new PointerPointer(allocator.getPointer(tempShapes, context)), arrays.size(), (IntPointer) shuffleMap, new PointerPointer(allocator.getPointer(tempTAD, context)), new PointerPointer(allocator.getPointer(tempOffsets, context)));
    }
    for (int f = 0; f < arrays.size(); f++) {
        allocator.getFlowController().registerAction(context, arrays.get(f));
    }
    // just to keep reference
    shuffle.address();
    tempX.dataType();
    tempShapes.dataType();
    tempOffsets.dataType();
    tempTAD.dataType();
}
Also used : ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) TADManager(org.nd4j.linalg.cache.TADManager) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Aggregations

DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)7 INDArray (org.nd4j.linalg.api.ndarray.INDArray)7 TADManager (org.nd4j.linalg.cache.TADManager)7 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)5 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)4 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)3 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)3 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CudaIntDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer)3 Test (org.junit.Test)2 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)2 LongPointerWrapper (org.nd4j.nativeblas.LongPointerWrapper)2 DeviceTADManager (org.nd4j.jita.allocator.tad.DeviceTADManager)1 Pair (org.nd4j.linalg.primitives.Pair)1