use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method convertDataEx.
@Override
public DataBuffer convertDataEx(DataBuffer.TypeEx typeSrc, DataBuffer source, DataBuffer.TypeEx typeDst) {
int elementSize = 0;
if (typeDst.ordinal() <= 2)
elementSize = 1;
else if (typeDst.ordinal() <= 5)
elementSize = 2;
else if (typeDst.ordinal() == 6)
elementSize = 4;
else if (typeDst.ordinal() == 7)
elementSize = 8;
else
throw new UnsupportedOperationException("Unknown target TypeEx: " + typeDst.name());
// flushQueue should be blocking here, because typeConversion happens on cpu side
Nd4j.getExecutioner().commit();
DataBuffer buffer = null;
if (!(source instanceof CompressedDataBuffer))
AtomicAllocator.getInstance().synchronizeHostData(source);
if (CompressionUtils.goingToCompress(typeSrc, typeDst)) {
// all types below 8 are compression modes
BytePointer pointer = new BytePointer(source.length() * elementSize);
CompressionDescriptor descriptor = new CompressionDescriptor(source, typeDst.name());
descriptor.setCompressionType(CompressionType.LOSSY);
descriptor.setCompressedLength(source.length() * elementSize);
buffer = new CompressedDataBuffer(pointer, descriptor);
} else {
CompressedDataBuffer compressed = (CompressedDataBuffer) source;
CompressionDescriptor descriptor = compressed.getCompressionDescriptor();
// decompression mode
buffer = Nd4j.createBuffer(descriptor.getNumberOfElements(), false);
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(buffer);
point.tickHostWrite();
}
convertDataEx(typeSrc, source, typeDst, buffer);
return buffer;
}
use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method convertDataEx.
/*
public DataBuffer convertToHalfs(DataBuffer buffer) {
DataBuffer halfsBuffer = new CudaHalfDataBuffer(buffer.length());
AtomicAllocator allocator = AtomicAllocator.getInstance();
AllocationPoint pointSrc = allocator.getAllocationPoint(buffer);
AllocationPoint pointDst = allocator.getAllocationPoint(halfsBuffer);
CudaContext context = allocator.getFlowController().prepareAction(pointDst, pointSrc);
PointerPointer extras = new PointerPointer(
null, // not used for conversion
context.getOldStream(),
AtomicAllocator.getInstance().getDeviceIdPointer());
Pointer x = AtomicAllocator.getInstance().getPointer(buffer, context);
Pointer z = AtomicAllocator.getInstance().getPointer(halfsBuffer, context);
if (buffer.dataType() == DataBuffer.Type.FLOAT) {
NativeOpsHolder.getInstance().getDeviceNativeOps().convertFloatsToHalfs(extras, x, (int) buffer.length(), z);
pointDst.tickDeviceWrite();
} else if (buffer.dataType() == DataBuffer.Type.DOUBLE) {
NativeOpsHolder.getInstance().getDeviceNativeOps().convertDoublesToHalfs(extras, x, (int) buffer.length(), z);
pointDst.tickDeviceWrite();
} else if (buffer.dataType() == DataBuffer.Type.HALF) {
log.info("Buffer is already HALF-precision");
return buffer;
} else {
throw new UnsupportedOperationException("Conversion INT->HALF isn't supported yet.");
}
allocator.getFlowController().registerAction(context, pointDst, pointSrc);
return halfsBuffer;
}
public DataBuffer restoreFromHalfs(DataBuffer buffer) {
if (buffer.dataType() != DataBuffer.Type.HALF)
throw new IllegalStateException("Input DataBuffer should contain Halfs");
DataBuffer outputBuffer = null;
if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
outputBuffer = new CudaFloatDataBuffer(buffer.length());
} else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
outputBuffer = new CudaDoubleDataBuffer(buffer.length());
} else throw new UnsupportedOperationException("DataType ["+Nd4j.dataType()+"] isn't supported yet");
AtomicAllocator allocator = AtomicAllocator.getInstance();
AllocationPoint pointSrc = allocator.getAllocationPoint(buffer);
AllocationPoint pointDst = allocator.getAllocationPoint(outputBuffer);
CudaContext context = allocator.getFlowController().prepareAction(pointDst, pointSrc);
PointerPointer extras = new PointerPointer(
null, // not used for conversion
context.getOldStream(),
AtomicAllocator.getInstance().getDeviceIdPointer());
Pointer x = AtomicAllocator.getInstance().getPointer(buffer, context);
Pointer z = AtomicAllocator.getInstance().getPointer(outputBuffer, context);
if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
NativeOpsHolder.getInstance().getDeviceNativeOps().convertHalfsToFloats(extras, x, (int) buffer.length(), z);
pointDst.tickDeviceWrite();
} else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
NativeOpsHolder.getInstance().getDeviceNativeOps().convertHalfsToDoubles(extras, x, (int) buffer.length(), z);
pointDst.tickDeviceWrite();
} else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
log.info("Buffer is already HALF-precision");
return buffer;
}
allocator.getFlowController().registerAction(context, pointDst, pointSrc);
return outputBuffer;
}
*/
/**
* This method converts Single/Double precision databuffer to Half-precision databuffer
*
* @param typeSrc
* @param source
* @param typeDst @return
*/
@Override
public INDArray convertDataEx(DataBuffer.TypeEx typeSrc, INDArray source, DataBuffer.TypeEx typeDst) {
if (source.isView())
throw new UnsupportedOperationException("Impossible to compress View. Consider using dup() before. ");
DataBuffer buffer = convertDataEx(typeSrc, source.data(), typeDst);
source.setData(buffer);
if (buffer instanceof CompressedDataBuffer)
source.markAsCompressed(true);
else
source.markAsCompressed(false);
return source;
}
use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.
the class CudaThreshold method compress.
@Override
public DataBuffer compress(DataBuffer buffer) {
int numThreads = 1024;
int numBlocks = (int) (buffer.length() / numThreads + (buffer.length() % numThreads == 0 ? 0 : 1));
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
DataBuffer blocksBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks + 1, true) : Nd4j.getDataBufferFactory().createInt(numBlocks + 1, true, Nd4j.getMemoryManager().getCurrentWorkspace());
PointerPointer extras = new PointerPointer(32).put(1, context.getOldStream());
NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP1Float(extras, (FloatPointer) AtomicAllocator.getInstance().getPointer(buffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), threshold);
AtomicAllocator.getInstance().getAllocationPoint(blocksBuffer).tickDeviceWrite();
int numMatches = blocksBuffer.getInt(0);
// log.info("Totals: {}", numMatches);
/*
log.info("Number of blocks for compression: {}", numBlocks);
log.info("BlocksCounts: {}", Arrays.toString(blocksBuffer.asInt()));
*/
DataBuffer encodedBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(3 + numMatches, false) : Nd4j.getDataBufferFactory().createInt(3 + numMatches, false, Nd4j.getMemoryManager().getCurrentWorkspace());
encodedBuffer.put(0, numMatches);
encodedBuffer.put(1, (int) buffer.length());
encodedBuffer.put(2, Float.floatToIntBits(threshold));
AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickHostWrite();
// FIXME: make it parallel via some kernel, because it can be pretty big array here, i.e. for 150m original array, offsets can
/*
int prevSum = 0;
for (int e = 0; e < numBlocks; e++) {
int prevVal = offsetsBuffer.getInt(e + 1);
offsetsBuffer.put(e + 1, prevSum);
prevSum += prevVal;
}
*/
int prefixThreads = 512;
int numElts = numBlocks;
int level = 0;
List<DataBuffer> buffers = new ArrayList<>();
// here we just calculate number of sumBlock arrays
do {
int numPrefixBlocks = Math.max(1, (int) Math.ceil((float) numElts / (2.0f * prefixThreads)));
if (numBlocks > 1) {
level++;
}
numElts = numPrefixBlocks;
} while (numElts > 1);
long[] pointers = new long[level];
level = 0;
numElts = numBlocks;
// allocating temp buffers for prefux sum
DataBuffer tempX = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createDouble(pointers.length, false) : Nd4j.getDataBufferFactory().createDouble(pointers.length, false, Nd4j.getMemoryManager().getCurrentWorkspace());
do {
int numPrefixBlocks = Math.max(1, (int) Math.ceil((float) numElts / (2.0f * prefixThreads)));
if (numPrefixBlocks > 1) {
DataBuffer bf = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false) : Nd4j.getDataBufferFactory().createInt(numPrefixBlocks, false, Nd4j.getMemoryManager().getCurrentWorkspace());
buffers.add(bf);
pointers[level++] = AtomicAllocator.getInstance().getPointer(bf).address();
}
numElts = numPrefixBlocks;
} while (numElts > 1);
AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(pointers), pointers.length * 8, 0);
extras.put(2, AtomicAllocator.getInstance().getPointer(tempX));
DataBuffer offsetsBuffer = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createInt(numBlocks, true) : Nd4j.getDataBufferFactory().createInt(numBlocks, true, Nd4j.getMemoryManager().getCurrentWorkspace());
NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP2Int(extras, (IntPointer) AtomicAllocator.getInstance().getPointer(blocksBuffer), numBlocks, (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer));
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickDeviceWrite();
// log.info("Offsets: {}", Arrays.toString(offsetsBuffer.asInt()));
// log.info("Target: {}", Arrays.toString(encodedBuffer.asInt()));
NativeOpsHolder.getInstance().getDeviceNativeOps().encodeThresholdP3Float(extras, (FloatPointer) AtomicAllocator.getInstance().getPointer(buffer), (IntPointer) AtomicAllocator.getInstance().getPointer(offsetsBuffer), buffer.length(), (IntPointer) AtomicAllocator.getInstance().getPointer(encodedBuffer));
AtomicAllocator.getInstance().getAllocationPoint(encodedBuffer).tickDeviceWrite();
AtomicAllocator.getInstance().getAllocationPoint(buffer).tickDeviceWrite();
// log.info("Encoded: {}", Arrays.toString(encodedBuffer.asInt()));
extras.address();
tempX.address();
return encodedBuffer;
/*
INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[]{1, (int) buffer.length()}));
MatchCondition condition = new MatchCondition(temp, Conditions.absGreaterThanOrEqual(threshold));
int cntAbs = Nd4j.getExecutioner().exec(condition, Integer.MAX_VALUE).getInt(0);
//log.info("density ratio: {}", String.format("%.2f", cntAbs * 100.0f / buffer.length()));
if (cntAbs == 0)
return null;
long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
int compressedLength = cntAbs + 3;
// first 3 elements contain header
IntPointer pointer = new IntPointer(compressedLength);
pointer.put(0, cntAbs);
pointer.put(1, (int) buffer.length());
pointer.put(2, Float.floatToIntBits(threshold));
CompressionDescriptor descriptor = new CompressionDescriptor();
descriptor.setCompressedLength(compressedLength * 4); // sizeOf(INT)
descriptor.setOriginalLength(originalLength);
descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
descriptor.setNumberOfElements(buffer.length());
descriptor.setCompressionAlgorithm(getDescriptor());
descriptor.setCompressionType(getCompressionType());
CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);
Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.THRESHOLD, pointer, buffer.length());
Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);
return cbuff;
*/
}
use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.
the class CpuNDArrayFactory method convertDataEx.
@Override
public DataBuffer convertDataEx(DataBuffer.TypeEx typeSrc, DataBuffer source, DataBuffer.TypeEx typeDst) {
int elementSize = 0;
if (typeDst.ordinal() <= 2)
elementSize = 1;
else if (typeDst.ordinal() <= 5)
elementSize = 2;
else if (typeDst.ordinal() == 6)
elementSize = 4;
else if (typeDst.ordinal() == 7)
elementSize = 8;
else
throw new UnsupportedOperationException("Unknown target TypeEx: " + typeDst.name());
DataBuffer buffer = null;
if (CompressionUtils.goingToCompress(typeSrc, typeDst)) {
// all types below 6 are compression modes
BytePointer pointer = new BytePointer(source.length() * elementSize);
CompressionDescriptor descriptor = new CompressionDescriptor(source, typeDst.name());
descriptor.setCompressionType(CompressionType.LOSSY);
descriptor.setCompressedLength(source.length() * elementSize);
buffer = new CompressedDataBuffer(pointer, descriptor);
} else {
CompressedDataBuffer compressed = (CompressedDataBuffer) source;
CompressionDescriptor descriptor = compressed.getCompressionDescriptor();
// decompression mode
buffer = Nd4j.createBuffer(descriptor.getNumberOfElements(), true);
}
convertDataEx(typeSrc, source, typeDst, buffer);
return buffer;
}
use of org.nd4j.linalg.compression.CompressedDataBuffer in project nd4j by deeplearning4j.
the class CpuFlexibleThreshold method compress.
@Override
public DataBuffer compress(DataBuffer buffer) {
INDArray temp = Nd4j.createArrayFromShapeBuffer(buffer, Nd4j.getShapeInfoProvider().createShapeInformation(new int[] { 1, (int) buffer.length() }).getFirst());
double max = temp.amaxNumber().doubleValue();
int cntAbs = temp.scan(Conditions.absGreaterThanOrEqual(max - (max * threshold))).intValue();
long originalLength = buffer.length() * Nd4j.sizeOfDataType(buffer.dataType());
int compressedLength = cntAbs + 4;
// first 3 elements contain header
IntPointer pointer = new IntPointer(compressedLength);
pointer.put(0, cntAbs);
pointer.put(1, (int) buffer.length());
// please note, this value will be ovewritten anyway
pointer.put(2, Float.floatToIntBits(threshold));
pointer.put(3, 0);
CompressionDescriptor descriptor = new CompressionDescriptor();
// sizeOf(INT)
descriptor.setCompressedLength(compressedLength * 4);
descriptor.setOriginalLength(originalLength);
descriptor.setOriginalElementSize(Nd4j.sizeOfDataType(buffer.dataType()));
descriptor.setNumberOfElements(buffer.length());
descriptor.setCompressionAlgorithm(getDescriptor());
descriptor.setCompressionType(getCompressionType());
CompressedDataBuffer cbuff = new CompressedDataBuffer(pointer, descriptor);
Nd4j.getNDArrayFactory().convertDataEx(getBufferTypeEx(buffer), buffer.addressPointer(), DataBuffer.TypeEx.FTHRESHOLD, pointer, buffer.length());
Nd4j.getAffinityManager().tagLocation(buffer, AffinityManager.Location.HOST);
return cbuff;
}
Aggregations