Search in sources :

Example 61 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method concat.

@Override
public INDArray concat(int dimension, INDArray... toConcat) {
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    if (toConcat.length == 1)
        return toConcat[0];
    int sumAlongDim = 0;
    for (int i = 0; i < toConcat.length; i++) {
        if (toConcat[i].isCompressed())
            Nd4j.getCompressor().decompressi(toConcat[i]);
        sumAlongDim += toConcat[i].size(dimension);
    }
    int[] outputShape = ArrayUtil.copy(toConcat[0].shape());
    outputShape[dimension] = sumAlongDim;
    INDArray ret = Nd4j.createUninitialized(outputShape, Nd4j.order());
    AtomicAllocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(ret, toConcat);
    long[] shapeInfoPointers = new long[toConcat.length];
    long[] dataPointers = new long[toConcat.length];
    long[] tadPointers = new long[toConcat.length];
    long[] offsetsPointers = new long[toConcat.length];
    long[] hostShapeInfoPointers = new long[toConcat.length];
    TADManager tadManager = Nd4j.getExecutioner().getTADManager();
    for (int i = 0; i < toConcat.length; i++) {
        shapeInfoPointers[i] = AddressRetriever.retrieveDeviceAddress(toConcat[i].shapeInfoDataBuffer(), context);
        dataPointers[i] = AtomicAllocator.getInstance().getPointer(toConcat[i], context).address();
        hostShapeInfoPointers[i] = AtomicAllocator.getInstance().getHostPointer(toConcat[i].shapeInfoDataBuffer()).address();
        sumAlongDim += toConcat[i].size(dimension);
        for (int j = 0; j < toConcat[i].rank(); j++) if (j != dimension && toConcat[i].size(j) != outputShape[j]) {
            throw new IllegalArgumentException("Illegal concatenation at array " + i + " and shape element " + j);
        }
        Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(toConcat[i], new int[] { dimension });
        long devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context).address();
        DataBuffer offsets = tadBuffers.getSecond();
        long devTadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context).address();
        tadPointers[i] = devTadShapeInfo;
        offsetsPointers[i] = devTadOffsets;
    }
    // getting tadOnlyShape for result
    Pair<DataBuffer, DataBuffer> zBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { dimension });
    // System.out.println("shapePointers: " + Arrays.toString(shapeInfoPointers));
    Pointer dZ = AtomicAllocator.getInstance().getPointer(ret, context);
    Pointer dZShapeInfo = AddressRetriever.retrieveDevicePointer(ret.shapeInfoDataBuffer(), context);
    CudaDoubleDataBuffer tempData = new CudaDoubleDataBuffer(toConcat.length);
    CudaDoubleDataBuffer tempShapes = new CudaDoubleDataBuffer(toConcat.length);
    CudaDoubleDataBuffer tempTAD = new CudaDoubleDataBuffer(toConcat.length);
    CudaDoubleDataBuffer tempOffsets = new CudaDoubleDataBuffer(toConcat.length);
    AtomicAllocator.getInstance().memcpyBlocking(tempData, new LongPointer(dataPointers), dataPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempShapes, new LongPointer(shapeInfoPointers), shapeInfoPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempTAD, new LongPointer(tadPointers), tadPointers.length * 8, 0);
    AtomicAllocator.getInstance().memcpyBlocking(tempOffsets, new LongPointer(offsetsPointers), offsetsPointers.length * 8, 0);
    Pointer dataPointer = AtomicAllocator.getInstance().getPointer(tempData, context);
    Pointer shapesPointer = AtomicAllocator.getInstance().getPointer(tempShapes, context);
    Pointer tadPointer = AtomicAllocator.getInstance().getPointer(tempTAD, context);
    Pointer offsetPointer = AtomicAllocator.getInstance().getPointer(tempOffsets, context);
    // System.out.println("ShapesPointer after conversion: " + shapesPointer);
    PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), AddressRetriever.retrieveHostPointer(toConcat[0].shapeInfoDataBuffer()), AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), new LongPointer(hostShapeInfoPointers), // getting zTADShape
    AtomicAllocator.getInstance().getPointer(zBuffers.getFirst(), context), // getting zOffset
    AtomicAllocator.getInstance().getPointer(zBuffers.getSecond(), context));
    if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.concatDouble(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (DoublePointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
    } else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.concatFloat(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (FloatPointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
    } else {
        nativeOps.concatHalf(extras, dimension, toConcat.length, new PointerPointer(new Pointer[] { dataPointer }), new PointerPointer(new Pointer[] { shapesPointer }), (ShortPointer) dZ, (IntPointer) dZShapeInfo, new PointerPointer(new Pointer[] { tadPointer }), new PointerPointer(new Pointer[] { offsetPointer }));
    }
    allocator.registerAction(context, ret, toConcat);
    return ret;
// return super.concat(dimension, toConcat);
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) TADManager(org.nd4j.linalg.cache.TADManager) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 62 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method sort.

@Override
public INDArray sort(INDArray x, boolean descending, int... dimension) {
    if (x.isScalar())
        return x;
    Arrays.sort(dimension);
    Nd4j.getExecutioner().push();
    Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(x, dimension);
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(x);
    PointerPointer extraz = new // not used
    PointerPointer(// not used
    AtomicAllocator.getInstance().getHostPointer(x.shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer());
    Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), context);
    if (x.data().dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.sortTadFloat(extraz, (FloatPointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
    } else if (x.data().dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.sortTadDouble(extraz, (DoublePointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
    } else if (x.data().dataType() == DataBuffer.Type.HALF) {
        nativeOps.sortTadHalf(extraz, (ShortPointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
    } else {
        throw new UnsupportedOperationException("Unknown dataType " + x.data().dataType());
    }
    AtomicAllocator.getInstance().getFlowController().registerAction(context, x);
    return x;
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 63 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method tear.

public INDArray[] tear(INDArray tensor, int... dimensions) {
    if (tensor.isCompressed())
        Nd4j.getCompressor().decompressi(tensor);
    Arrays.sort(dimensions);
    Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(tensor, dimensions);
    long tadLength = 1;
    int[] shape = new int[dimensions.length];
    for (int i = 0; i < dimensions.length; i++) {
        tadLength *= tensor.shape()[dimensions[i]];
        shape[i] = tensor.shape()[dimensions[i]];
    }
    int numTads = (int) (tensor.lengthLong() / tadLength);
    INDArray[] result = new INDArray[numTads];
    long[] xPointers = new long[numTads];
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(null, tensor);
    for (int x = 0; x < numTads; x++) {
        result[x] = Nd4j.createUninitialized(shape);
        context = AtomicAllocator.getInstance().getFlowController().prepareAction(result[x]);
        xPointers[x] = AtomicAllocator.getInstance().getPointer(result[x], context).address();
    }
    CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(numTads);
    AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
    PointerPointer extraz = new // not used
    PointerPointer(// not used
    null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer());
    if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.tearDouble(extraz, (DoublePointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
    } else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.tearFloat(extraz, (FloatPointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
    } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
        nativeOps.tearHalf(extraz, (ShortPointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
    }
    AtomicAllocator.getInstance().getFlowController().registerActionAllWrite(context, result);
    AtomicAllocator.getInstance().getFlowController().registerAction(context, null, result);
    return result;
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 64 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method createFromNpyPointer.

/**
 * Create from an in memory numpy pointer
 *
 * @param pointer the pointer to the
 *                numpy array
 * @return an ndarray created from the in memory
 * numpy pointer
 */
@Override
public INDArray createFromNpyPointer(Pointer pointer) {
    Pointer dataPointer = nativeOps.dataPointForNumpy(pointer);
    int dataBufferElementSize = nativeOps.elementSizeForNpyArray(pointer);
    DataBuffer data = null;
    Pointer shapeBufferPointer = nativeOps.shapeBufferForNumpy(pointer);
    int length = nativeOps.lengthForShapeBufferPointer(shapeBufferPointer);
    shapeBufferPointer.capacity(4 * length);
    shapeBufferPointer.limit(4 * length);
    shapeBufferPointer.position(0);
    IntPointer intPointer = new IntPointer(shapeBufferPointer);
    DataBuffer shapeBuffer = Nd4j.createBuffer(shapeBufferPointer, DataBuffer.Type.INT, length, IntIndexer.create(intPointer));
    dataPointer.position(0);
    dataPointer.limit(dataBufferElementSize * Shape.length(shapeBuffer));
    dataPointer.capacity(dataBufferElementSize * Shape.length(shapeBuffer));
    // we don't care about pointers here, they will be copied in BaseCudaDataBuffer method, and indexer will be recreated
    if (dataBufferElementSize == (Float.SIZE / 8)) {
        data = Nd4j.createBuffer(dataPointer, DataBuffer.Type.FLOAT, Shape.length(shapeBuffer), FloatIndexer.create(new FloatPointer(dataPointer)));
    } else if (dataBufferElementSize == (Double.SIZE / 8)) {
        data = Nd4j.createBuffer(dataPointer, DataBuffer.Type.DOUBLE, Shape.length(shapeBuffer), DoubleIndexer.create(new DoublePointer(dataPointer)));
    }
    INDArray ret = Nd4j.create(data, Shape.shape(shapeBuffer), Shape.strideArr(shapeBuffer), Shape.offset(shapeBuffer), Shape.order(shapeBuffer));
    return ret;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 65 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class CudaFloatDataBufferTest method testPut.

@Test
public void testPut() throws Exception {
    DataBuffer buffer = Nd4j.createBuffer(new float[] { 1f, 2f, 3f, 4f });
    buffer.put(2, 16f);
    assertEquals(16.0f, buffer.getFloat(2), 0.001f);
    System.out.println("Data: " + buffer);
}
Also used : DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) Test(org.junit.Test)

Aggregations

DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)186 INDArray (org.nd4j.linalg.api.ndarray.INDArray)79 Test (org.junit.Test)47 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)44 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)39 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)30 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)25 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)23 BaseDataBuffer (org.nd4j.linalg.api.buffer.BaseDataBuffer)19 Pointer (org.bytedeco.javacpp.Pointer)18 BaseNd4jTest (org.nd4j.linalg.BaseNd4jTest)16 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)16 IntPointer (org.bytedeco.javacpp.IntPointer)13 PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)13 CudaIntDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer)13 DoublePointer (org.bytedeco.javacpp.DoublePointer)12 FloatPointer (org.bytedeco.javacpp.FloatPointer)12 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)12 LongPointerWrapper (org.nd4j.nativeblas.LongPointerWrapper)11 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)10