use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method tear.
public INDArray[] tear(INDArray tensor, int... dimensions) {
if (tensor.isCompressed())
Nd4j.getCompressor().decompressi(tensor);
Arrays.sort(dimensions);
Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(tensor, dimensions);
long tadLength = 1;
int[] shape = new int[dimensions.length];
for (int i = 0; i < dimensions.length; i++) {
tadLength *= tensor.shape()[dimensions[i]];
shape[i] = tensor.shape()[dimensions[i]];
}
int numTads = (int) (tensor.lengthLong() / tadLength);
INDArray[] result = new INDArray[numTads];
long[] xPointers = new long[numTads];
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(null, tensor);
for (int x = 0; x < numTads; x++) {
result[x] = Nd4j.createUninitialized(shape);
context = AtomicAllocator.getInstance().getFlowController().prepareAction(result[x]);
xPointers[x] = AtomicAllocator.getInstance().getPointer(result[x], context).address();
}
CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(numTads);
AtomicAllocator.getInstance().memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
PointerPointer extraz = new // not used
PointerPointer(// not used
null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer());
if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.tearDouble(extraz, (DoublePointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
} else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
nativeOps.tearFloat(extraz, (FloatPointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
} else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
nativeOps.tearHalf(extraz, (ShortPointer) AtomicAllocator.getInstance().getPointer(tensor, context), (IntPointer) AtomicAllocator.getInstance().getPointer(tensor.shapeInfoDataBuffer(), context), new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context)), (IntPointer) AtomicAllocator.getInstance().getPointer(result[0].shapeInfoDataBuffer(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)));
}
AtomicAllocator.getInstance().getFlowController().registerActionAllWrite(context, result);
AtomicAllocator.getInstance().getFlowController().registerAction(context, null, result);
return result;
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method pullRows.
/**
* This method produces concatenated array, that consist from tensors, fetched from source array, against some dimension and specified indexes
*
* @param source source tensor
* @param sourceDimension dimension of source tensor
* @param indexes indexes from source array
* @return
*/
@Override
public INDArray pullRows(INDArray source, int sourceDimension, int[] indexes, char order) {
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
if (indexes == null || indexes.length < 1)
throw new IllegalStateException("Indexes can't be null or zero-length");
int[] shape = null;
if (sourceDimension == 1)
shape = new int[] { indexes.length, source.shape()[sourceDimension] };
else if (sourceDimension == 0)
shape = new int[] { source.shape()[sourceDimension], indexes.length };
else
throw new UnsupportedOperationException("2D input is expected");
INDArray ret = Nd4j.createUninitialized(shape, order);
AtomicAllocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(ret, source);
Pointer x = AtomicAllocator.getInstance().getPointer(source, context);
Pointer xShape = AtomicAllocator.getInstance().getPointer(source.shapeInfoDataBuffer(), context);
Pointer z = AtomicAllocator.getInstance().getPointer(ret, context);
Pointer zShape = AtomicAllocator.getInstance().getPointer(ret.shapeInfoDataBuffer(), context);
PointerPointer extras = new PointerPointer(AddressRetriever.retrieveHostPointer(ret.shapeInfoDataBuffer()), context.getOldStream(), allocator.getDeviceIdPointer());
CudaIntDataBuffer tempIndexes = new CudaIntDataBuffer(indexes.length);
AtomicAllocator.getInstance().memcpyBlocking(tempIndexes, new IntPointer(indexes), indexes.length * 4, 0);
Pointer pIndex = AtomicAllocator.getInstance().getPointer(tempIndexes, context);
TADManager tadManager = Nd4j.getExecutioner().getTADManager();
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(source, new int[] { sourceDimension });
Pair<DataBuffer, DataBuffer> zTadBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { sourceDimension });
Pointer tadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
Pointer zTadShapeInfo = AtomicAllocator.getInstance().getPointer(zTadBuffers.getFirst(), context);
DataBuffer offsets = tadBuffers.getSecond();
Pointer tadOffsets = AtomicAllocator.getInstance().getPointer(offsets, context);
Pointer zTadOffsets = AtomicAllocator.getInstance().getPointer(zTadBuffers.getSecond(), context);
if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.pullRowsDouble(extras, (DoublePointer) x, (IntPointer) xShape, (DoublePointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
} else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.pullRowsFloat(extras, (FloatPointer) x, (IntPointer) xShape, (FloatPointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
} else {
nativeOps.pullRowsHalf(extras, (ShortPointer) x, (IntPointer) xShape, (ShortPointer) z, (IntPointer) zShape, indexes.length, (IntPointer) pIndex, (IntPointer) tadShapeInfo, new LongPointerWrapper(tadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
}
allocator.registerAction(context, ret, source);
return ret;
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class CudaGridExecutioner method exec.
@Override
public void exec(MetaOp op) {
if (extraz.get() == null)
extraz.set(new PointerPointer(32));
prepareGrid(op);
GridPointers first = op.getGridDescriptor().getGridPointers().get(0);
GridPointers second = op.getGridDescriptor().getGridPointers().get(1);
// we need to use it only for first op, since for MetaOps second op shares the same X & Z by definition
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(first.getOpZ(), first.getOpY());
// AtomicAllocator.getInstance().getFlowController().prepareAction(second.getOpX(), second.getOpY(), second.getOpZ());
// CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
PointerPointer extras = extraz.get().put(null, context.getOldStream());
double scalarA = 0.0;
double scalarB = 0.0;
if (op.getFirstOp() instanceof ScalarOp)
scalarA = ((ScalarOp) op.getFirstOp()).scalar().doubleValue();
if (op.getSecondOp() instanceof ScalarOp)
scalarB = ((ScalarOp) op.getSecondOp()).scalar().doubleValue();
// logger.info("FirstOp: {}, SecondOp: {}", op.getFirstOp().getClass().getSimpleName(), op.getSecondOp().getClass().getSimpleName());
/*
TODO: launch can be either strided, or shapeInfo-based, it doesn't really matters for us.
We just need to pass all pointers.
TODO: obviously, execMetaPredicateElementwiseFloat should be renamed to execMetaPredicateStridedFloat
*/
// FIXME: this is bad hack, reconsider this one
GridPointers yGrid = first;
if (op.getSecondOp().y() != null) {
yGrid = second;
}
if (op instanceof PredicateMetaOp || op instanceof InvertedPredicateMetaOp) {
if (first.getDtype() == DataBuffer.Type.FLOAT) {
if (yGrid.getYOrder() == yGrid.getXOrder() && yGrid.getXStride() >= 1 && yGrid.getYStride() >= 1) {
nativeOps.execMetaPredicateStridedFloat(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), first.getXLength(), // can be null
(FloatPointer) first.getX(), // can be null
first.getXStride(), // can be null
(FloatPointer) yGrid.getY(), // cane be -1
yGrid.getYStride(), (FloatPointer) second.getZ(), second.getZStride(), (FloatPointer) first.getExtraArgs(), (FloatPointer) second.getExtraArgs(), (float) scalarA, (float) scalarB);
} else {
nativeOps.execMetaPredicateShapeFloat(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), first.getXLength(), (FloatPointer) first.getX(), (IntPointer) first.getXShapeInfo(), // can be null
(FloatPointer) yGrid.getY(), // cane be -1
(IntPointer) yGrid.getYShapeInfo(), (FloatPointer) second.getZ(), (IntPointer) second.getZShapeInfo(), (FloatPointer) first.getExtraArgs(), (FloatPointer) second.getExtraArgs(), (float) scalarA, (float) scalarB);
}
} else if (first.getDtype() == DataBuffer.Type.DOUBLE) {
if (yGrid.getYOrder() == yGrid.getXOrder() && yGrid.getXStride() >= 1 && yGrid.getYStride() >= 1) {
nativeOps.execMetaPredicateStridedDouble(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), first.getXLength(), // can be null
(DoublePointer) first.getX(), // can be null
first.getXStride(), // can be null
(DoublePointer) yGrid.getY(), // cane be -1
yGrid.getYStride(), (DoublePointer) second.getZ(), second.getZStride(), (DoublePointer) first.getExtraArgs(), (DoublePointer) second.getExtraArgs(), scalarA, scalarB);
} else {
nativeOps.execMetaPredicateShapeDouble(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), first.getXLength(), (DoublePointer) first.getX(), (IntPointer) first.getXShapeInfo(), // can be null
(DoublePointer) yGrid.getY(), // cane be -1
(IntPointer) yGrid.getYShapeInfo(), (DoublePointer) second.getZ(), (IntPointer) second.getZShapeInfo(), (DoublePointer) first.getExtraArgs(), (DoublePointer) second.getExtraArgs(), scalarA, scalarB);
}
} else {
if (yGrid.getYOrder() == yGrid.getXOrder() && yGrid.getXStride() >= 1 && yGrid.getYStride() >= 1) {
nativeOps.execMetaPredicateStridedHalf(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), first.getXLength(), // can be null
(ShortPointer) first.getX(), // can be null
first.getXStride(), // can be null
(ShortPointer) yGrid.getY(), // cane be -1
yGrid.getYStride(), (ShortPointer) second.getZ(), second.getZStride(), (ShortPointer) first.getExtraArgs(), (ShortPointer) second.getExtraArgs(), (float) scalarA, (float) scalarB);
} else {
nativeOps.execMetaPredicateShapeHalf(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), first.getXLength(), (ShortPointer) first.getX(), (IntPointer) first.getXShapeInfo(), // can be null
(ShortPointer) yGrid.getY(), // cane be -1
(IntPointer) yGrid.getYShapeInfo(), (ShortPointer) second.getZ(), (IntPointer) second.getZShapeInfo(), (ShortPointer) first.getExtraArgs(), (ShortPointer) second.getExtraArgs(), (float) scalarA, (float) scalarB);
}
}
} else if (op instanceof ReduceMetaOp) {
if (first.getDtype() == DataBuffer.Type.FLOAT) {
nativeOps.execMetaPredicateReduceFloat(extras, first.getType().ordinal(), first.getOpNum(), second.getType().ordinal(), second.getOpNum(), (FloatPointer) first.getX(), (IntPointer) first.getXShapeInfo(), (FloatPointer) second.getY(), (IntPointer) second.getYShapeInfo(), (FloatPointer) second.getZ(), (IntPointer) second.getZShapeInfo(), (IntPointer) second.getDimensions(), second.getDimensionsLength(), (IntPointer) second.getTadShape(), new LongPointerWrapper(second.getTadOffsets()), (FloatPointer) first.getExtraArgs(), (FloatPointer) second.getExtraArgs(), (float) scalarA, 0.0f, false);
}
}
AtomicAllocator.getInstance().getFlowController().registerAction(context, first.getOpZ(), first.getOpY());
// AtomicAllocator.getInstance().getFlowController().registerAction(context, second.getOpX(), second.getOpY(), second.getOpZ());
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class CpuNDArrayFactory method pullRows.
/**
* This method produces concatenated array, that consist from tensors, fetched from source array, against some dimension and specified indexes
*
* @param source source tensor
* @param sourceDimension dimension of source tensor
* @param indexes indexes from source array
* @return
*/
@Override
public INDArray pullRows(INDArray source, int sourceDimension, int[] indexes, char order) {
if (indexes == null || indexes.length < 1)
throw new IllegalStateException("Indexes can't be null or zero-length");
int[] shape = null;
if (sourceDimension == 1)
shape = new int[] { indexes.length, source.shape()[sourceDimension] };
else if (sourceDimension == 0)
shape = new int[] { source.shape()[sourceDimension], indexes.length };
else
throw new UnsupportedOperationException("2D input is expected");
INDArray ret = Nd4j.createUninitialized(shape, order);
Nd4j.getCompressor().autoDecompress(source);
PointerPointer dummy = new PointerPointer(new Pointer[] { null });
TADManager tadManager = Nd4j.getExecutioner().getTADManager();
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(source, new int[] { sourceDimension });
Pair<DataBuffer, DataBuffer> zTadBuffers = tadManager.getTADOnlyShapeInfo(ret, new int[] { sourceDimension });
Pointer hostTadShapeInfo = tadBuffers.getFirst().addressPointer();
Pointer zTadShapeInfo = zTadBuffers.getFirst().addressPointer();
IntPointer pIndex = new IntPointer(indexes);
DataBuffer offsets = tadBuffers.getSecond();
Pointer hostTadOffsets = offsets == null ? null : offsets.addressPointer();
DataBuffer zOffsets = zTadBuffers.getSecond();
Pointer zTadOffsets = zOffsets == null ? null : zOffsets.addressPointer();
if (ret.data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.pullRowsDouble(dummy, (DoublePointer) source.data().addressPointer(), (IntPointer) source.shapeInfoDataBuffer().addressPointer(), (DoublePointer) ret.data().addressPointer(), (IntPointer) ret.shapeInfoDataBuffer().addressPointer(), indexes.length, pIndex, (IntPointer) hostTadShapeInfo, new LongPointerWrapper(hostTadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
} else if (ret.data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.pullRowsFloat(dummy, (FloatPointer) source.data().addressPointer(), (IntPointer) source.shapeInfoDataBuffer().addressPointer(), (FloatPointer) ret.data().addressPointer(), (IntPointer) ret.shapeInfoDataBuffer().addressPointer(), indexes.length, pIndex, (IntPointer) hostTadShapeInfo, new LongPointerWrapper(hostTadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
} else {
nativeOps.pullRowsHalf(dummy, (ShortPointer) source.data().addressPointer(), (IntPointer) source.shapeInfoDataBuffer().addressPointer(), (ShortPointer) ret.data().addressPointer(), (IntPointer) ret.shapeInfoDataBuffer().addressPointer(), indexes.length, pIndex, (IntPointer) hostTadShapeInfo, new LongPointerWrapper(hostTadOffsets), (IntPointer) zTadShapeInfo, new LongPointerWrapper(zTadOffsets));
}
return ret;
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class CpuNDArrayFactory method tear.
public INDArray[] tear(INDArray tensor, int... dimensions) {
if (tensor.isCompressed())
Nd4j.getCompressor().decompressi(tensor);
Arrays.sort(dimensions);
Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(tensor, dimensions);
long tadLength = 1;
int[] shape = new int[dimensions.length];
for (int i = 0; i < dimensions.length; i++) {
tadLength *= tensor.shape()[dimensions[i]];
shape[i] = tensor.shape()[dimensions[i]];
}
int numTads = (int) (tensor.lengthLong() / tadLength);
INDArray[] result = new INDArray[numTads];
PointerPointer targets = new PointerPointer(numTads);
for (int x = 0; x < numTads; x++) {
result[x] = Nd4j.createUninitialized(shape);
targets.put(x, result[x].data().pointer());
}
if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.tearDouble(null, (DoublePointer) tensor.data().pointer(), (IntPointer) tensor.shapeInfoDataBuffer().pointer(), targets, (IntPointer) result[0].shapeInfoDataBuffer().pointer(), (IntPointer) tadBuffers.getFirst().pointer(), new LongPointerWrapper(tadBuffers.getSecond().pointer()));
} else if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
nativeOps.tearFloat(null, (FloatPointer) tensor.data().pointer(), (IntPointer) tensor.shapeInfoDataBuffer().pointer(), targets, (IntPointer) result[0].shapeInfoDataBuffer().pointer(), (IntPointer) tadBuffers.getFirst().pointer(), new LongPointerWrapper(tadBuffers.getSecond().pointer()));
} else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
throw new UnsupportedOperationException("Half precision isn't supported for CPU backend");
}
return result;
}
Aggregations