Search in sources :

Example 11 with CompressedDataBuffer

use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method convertDataEx.

@Override
public DataBuffer convertDataEx(DataBuffer.TypeEx typeSrc, DataBuffer source, DataBuffer.TypeEx typeDst) {
    int elementSize = 0;
    if (typeDst.ordinal() <= 2)
        elementSize = 1;
    else if (typeDst.ordinal() <= 5)
        elementSize = 2;
    else if (typeDst.ordinal() == 6)
        elementSize = 4;
    else if (typeDst.ordinal() == 7)
        elementSize = 8;
    else
        throw new UnsupportedOperationException("Unknown target TypeEx: " + typeDst.name());
    // flushQueue should be blocking here, because typeConversion happens on cpu side
    Nd4j.getExecutioner().commit();
    DataBuffer buffer = null;
    if (!(source instanceof CompressedDataBuffer))
        AtomicAllocator.getInstance().synchronizeHostData(source);
    if (CompressionUtils.goingToCompress(typeSrc, typeDst)) {
        // all types below 8 are compression modes
        BytePointer pointer = new BytePointer(source.length() * elementSize);
        CompressionDescriptor descriptor = new CompressionDescriptor(source, typeDst.name());
        descriptor.setCompressionType(CompressionType.LOSSY);
        descriptor.setCompressedLength(source.length() * elementSize);
        buffer = new CompressedDataBuffer(pointer, descriptor);
    } else {
        CompressedDataBuffer compressed = (CompressedDataBuffer) source;
        CompressionDescriptor descriptor = compressed.getCompressionDescriptor();
        // decompression mode
        buffer = Nd4j.createBuffer(descriptor.getNumberOfElements(), false);
        AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(buffer);
        point.tickHostWrite();
    }
    convertDataEx(typeSrc, source, typeDst, buffer);
    return buffer;
}
Also used : CompressionDescriptor(org.nd4j.linalg.compression.CompressionDescriptor) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 12 with CompressedDataBuffer

use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method convertDataEx.

/*
    public DataBuffer convertToHalfs(DataBuffer buffer) {
        DataBuffer halfsBuffer = new CudaHalfDataBuffer(buffer.length());
    
        AtomicAllocator allocator = AtomicAllocator.getInstance();
    
        AllocationPoint pointSrc = allocator.getAllocationPoint(buffer);
        AllocationPoint pointDst = allocator.getAllocationPoint(halfsBuffer);
    
        CudaContext context =  allocator.getFlowController().prepareAction(pointDst, pointSrc);
    
        PointerPointer extras = new PointerPointer(
                null, // not used for conversion
                context.getOldStream(),
                AtomicAllocator.getInstance().getDeviceIdPointer());
    
        Pointer x = AtomicAllocator.getInstance().getPointer(buffer, context);
        Pointer z = AtomicAllocator.getInstance().getPointer(halfsBuffer, context);
    
        if (buffer.dataType() == DataBuffer.Type.FLOAT) {
            NativeOpsHolder.getInstance().getDeviceNativeOps().convertFloatsToHalfs(extras, x, (int) buffer.length(), z);
            pointDst.tickDeviceWrite();
        } else if (buffer.dataType() == DataBuffer.Type.DOUBLE) {
            NativeOpsHolder.getInstance().getDeviceNativeOps().convertDoublesToHalfs(extras, x, (int) buffer.length(), z);
            pointDst.tickDeviceWrite();
        } else if (buffer.dataType() == DataBuffer.Type.HALF) {
            log.info("Buffer is already HALF-precision");
            return buffer;
        } else {
            throw new UnsupportedOperationException("Conversion INT->HALF isn't supported yet.");
        }
    
        allocator.getFlowController().registerAction(context, pointDst, pointSrc);
    
        return halfsBuffer;
    }
    
    public DataBuffer restoreFromHalfs(DataBuffer buffer) {
        if (buffer.dataType() != DataBuffer.Type.HALF)
            throw new IllegalStateException("Input DataBuffer should contain Halfs");
    
        DataBuffer outputBuffer = null;
    
    
    
        if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
            outputBuffer = new CudaFloatDataBuffer(buffer.length());
    
        } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
            outputBuffer = new CudaDoubleDataBuffer(buffer.length());
    
        } else throw new UnsupportedOperationException("DataType ["+Nd4j.dataType()+"] isn't supported yet");
    
        AtomicAllocator allocator = AtomicAllocator.getInstance();
    
        AllocationPoint pointSrc = allocator.getAllocationPoint(buffer);
        AllocationPoint pointDst = allocator.getAllocationPoint(outputBuffer);
    
        CudaContext context =  allocator.getFlowController().prepareAction(pointDst, pointSrc);
    
        PointerPointer extras = new PointerPointer(
                null, // not used for conversion
                context.getOldStream(),
                AtomicAllocator.getInstance().getDeviceIdPointer());
    
        Pointer x = AtomicAllocator.getInstance().getPointer(buffer, context);
        Pointer z = AtomicAllocator.getInstance().getPointer(outputBuffer, context);
    
        if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
            NativeOpsHolder.getInstance().getDeviceNativeOps().convertHalfsToFloats(extras, x, (int) buffer.length(), z);
            pointDst.tickDeviceWrite();
        } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
            NativeOpsHolder.getInstance().getDeviceNativeOps().convertHalfsToDoubles(extras, x, (int) buffer.length(), z);
            pointDst.tickDeviceWrite();
        } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
            log.info("Buffer is already HALF-precision");
            return buffer;
        }
    
        allocator.getFlowController().registerAction(context, pointDst, pointSrc);
    
        return outputBuffer;
    }
    */
/**
 * This method converts Single/Double precision databuffer to Half-precision databuffer
 *
 * @param typeSrc
 * @param source
 * @param typeDst @return
 */
@Override
public INDArray convertDataEx(DataBuffer.TypeEx typeSrc, INDArray source, DataBuffer.TypeEx typeDst) {
    if (source.isView())
        throw new UnsupportedOperationException("Impossible to compress View. Consider using dup() before. ");
    DataBuffer buffer = convertDataEx(typeSrc, source.data(), typeDst);
    source.setData(buffer);
    if (buffer instanceof CompressedDataBuffer)
        source.markAsCompressed(true);
    else
        source.markAsCompressed(false);
    return source;
}
Also used : CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 13 with CompressedDataBuffer

use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.

the class CudaThreshold method compress.

@Override
public DataBuffer compress(DataBuffer buffer) {
    int numThreads = 1024;
    int numBlocks = (int) (buffer.length() / numThreads + (buffer.length() % numThreads == 0 ? 0 : 1));
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    DataBuffer blocksBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks + 1, true) : Nd4j.getDataBufferFactory().createInt(numBlocks + 1, true, Nd4j.getMemoryManager().getCurrentWorkspace());
    PointerPointer extras = new PointerPointer(32).put(1, context.getOldStream());
    NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP1Float(extras, (FloatPointer) AtomicAllocator.getInstance().getPointer(buffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), threshold);
    AtomicAllocator.getInstance().getAllocationPoint(blocksBuffer).tickDeviceWrite();
    int numMatches = blocksBuffer.getInt(0);
    // log.info("Totals: {}", numMatches);
    /*

        log.info("Number of blocks for compression: {}", numBlocks);
        log.info("BlocksCounts: {}", Arrays.toString(blocksBuffer.asInt()));
*/
    DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(3 + numMatches, false) : Nd4j.getDataBufferFactory().createInt(3 + numMatches, false, Nd4j.getMemoryManager().getCurrentWorkspace());
    encodedBuffer.put(0, numMatches);
    encodedBuffer.put(1, (int) buffer.length());
    encodedBuffer.put(2, Float.floatToIntBits(threshold));
    AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickHostWrite();
    // FIXME: make it parallel via some kernel, because it can be pretty big array here, i.e. for 150m original array, offsets can
    /*
        int prevSum = 0;
        for (int e = 0; e < numBlocks; e++) {
            int prevVal = offsetsBuffer.getInt(e + 1);
            offsetsBuffer.put(e + 1, prevSum);
            prevSum += prevVal;
        }
        */
    int prefixThreads = 512;
    int numElts = numBlocks;
    int level = 0;
    List<DataBuffer> buffers = new ArrayList<>();
    // here we just calculate number of sumBlock arrays
    do {
        int numPrefixBlocks = Math.max(1, (int) Math.ceil((float) numElts / (2.0f * prefixThreads)));
        if (numBlocks > 1) {
            level++;
        }
        numElts = numPrefixBlocks;
    } while (numElts > 1);
    long[] pointers = new long[level];
    level = 0;
    numElts = numBlocks;
    // allocating temp buffers for prefux sum
    DataBuffer tempX = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createDouble(pointers.length, false) : Nd4j.getDataBufferFactory().createDouble(pointers.length, false, Nd4j.getMemoryManager().getCurrentWorkspace());
    do {
        int numPrefixBlocks = Math.max(1, (int) Math.ceil((float) numElts / (2.0f * prefixThreads)));
        if (numPrefixBlocks > 1) {
            DataBuffer bf = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false) : Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false, Nd4j.getMemoryManager().getCurrentWorkspace());
            buffers.add(bf);
            pointers[level++] = AtomicAllocator.getInstance().getPointer(bf).address();
        }
        numElts = numPrefixBlocks;
    } while (numElts > 1);
    AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(pointers), pointers.length * 8, 0);
    extras.put(2, AtomicAllocator.getInstance().getPointer(tempX));
    DataBuffer offsetsBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks, true) : Nd4j.getDataBufferFactory().createInt(numBlocks, true, Nd4j.getMemoryManager().getCurrentWorkspace());
    NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP2Int(extras, (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), numBlocks, (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer));
    AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickDeviceWrite();
    // log.info("Offsets: {}", Arrays.toString(offsetsBuffer.asInt()));
    // log.info("Target: {}", Arrays.toString(encodedBuffer.asInt()));
    NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP3Float(extras, (FloatPointer) AtomicAllocator.getInstance().getPointer(buffer), (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(encodedBuffer));
    AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickDeviceWrite();
    AtomicAllocator.getInstance().getAllocationPoint(buffer).tickDeviceWrite();
    // log.info("Encoded: {}", Arrays.toString(encodedBuffer.asInt()));
    extras.address();
    tempX.address();
    return encodedBuffer;
/*
        INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()}));
        MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold));
        int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0);


        //log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length()));

        if (cntAbs == 0)
            return null;

        long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
        int compressedLength = cntAbs + 3;
        // first 3 elements contain header
        IntPointer pointer = new IntPointer(compressedLength);
        pointer.put(0, cntAbs);
        pointer.put(1, (int) buffer.length());
        pointer.put(2, Float.floatToIntBits(threshold));

        CompressionDescriptor descriptor = new CompressionDescriptor();
        descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT)
        descriptor.setOriginalLength(originalLength);
        descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
        descriptor.setNumberOfElements(buffer.length());

        descriptor.setCompressionAlgorithm(getDescriptor());
        descriptor.setCompressionType(getCompressionType());



        CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);

        Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.THRESHOLD, pointer, buffer.length());

        Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);

        return cbuff;
        */
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ArrayList(java.util.ArrayList) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 14 with CompressedDataBuffer

use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.

the class CpuNDArrayFactory method convertDataEx.

@Override
public DataBuffer convertDataEx(DataBuffer.TypeEx typeSrc, DataBuffer source, DataBuffer.TypeEx typeDst) {
    int elementSize = 0;
    if (typeDst.ordinal() <= 2)
        elementSize = 1;
    else if (typeDst.ordinal() <= 5)
        elementSize = 2;
    else if (typeDst.ordinal() == 6)
        elementSize = 4;
    else if (typeDst.ordinal() == 7)
        elementSize = 8;
    else
        throw new UnsupportedOperationException("Unknown target TypeEx: " + typeDst.name());
    DataBuffer buffer = null;
    if (CompressionUtils.goingToCompress(typeSrc, typeDst)) {
        // all types below 6 are compression modes
        BytePointer pointer = new BytePointer(source.length() * elementSize);
        CompressionDescriptor descriptor = new CompressionDescriptor(source, typeDst.name());
        descriptor.setCompressionType(CompressionType.LOSSY);
        descriptor.setCompressedLength(source.length() * elementSize);
        buffer = new CompressedDataBuffer(pointer, descriptor);
    } else {
        CompressedDataBuffer compressed = (CompressedDataBuffer) source;
        CompressionDescriptor descriptor = compressed.getCompressionDescriptor();
        // decompression mode
        buffer = Nd4j.createBuffer(descriptor.getNumberOfElements(), true);
    }
    convertDataEx(typeSrc, source, typeDst, buffer);
    return buffer;
}
Also used : CompressionDescriptor(org.nd4j.linalg.compression.CompressionDescriptor) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer)

Example 15 with CompressedDataBuffer

use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.

the class CpuFlexibleThreshold method compress.

@Override
public DataBuffer compress(DataBuffer buffer) {
    INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[] { 1, (int) buffer.length() }).getFirst());
    double max = temp.amaxNumber().doubleValue();
    int cntAbs = temp.scan(Conditions.absGreaterThanOrEqual(max - (max * threshold))).intValue();
    long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
    int compressedLength = cntAbs + 4;
    // first 3 elements contain header
    IntPointer pointer = new IntPointer(compressedLength);
    pointer.put(0, cntAbs);
    pointer.put(1, (int) buffer.length());
    // please note, this value will be ovewritten anyway
    pointer.put(2, Float.floatToIntBits(threshold));
    pointer.put(3, 0);
    CompressionDescriptor descriptor = new CompressionDescriptor();
    // sizeOf(INT)
    descriptor.setCompressedLength(compressedLength * 4);
    descriptor.setOriginalLength(originalLength);
    descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
    descriptor.setNumberOfElements(buffer.length());
    descriptor.setCompressionAlgorithm(getDescriptor());
    descriptor.setCompressionType(getCompressionType());
    CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);
    Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.FTHRESHOLD, pointer, buffer.length());
    Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);
    return cbuff;
}
Also used : CompressionDescriptor(org.nd4j.linalg.compression.CompressionDescriptor) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) IntPointer(org.bytedeco.javacpp.IntPointer)

Aggregations

CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)22 CompressionDescriptor (org.nd4j.linalg.compression.CompressionDescriptor)17 BytePointer (org.bytedeco.javacpp.BytePointer)10 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)8 INDArray (org.nd4j.linalg.api.ndarray.INDArray)4 ByteBuffer (java.nio.ByteBuffer)3 IntPointer (org.bytedeco.javacpp.IntPointer)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)2 CudaIntDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer)2 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 ArrayList (java.util.ArrayList)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)1 Pointer (org.bytedeco.javacpp.Pointer)1 MatchCondition (org.nd4j.linalg.api.ops.impl.accum.MatchCondition)1