use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class CudaExecutioner method naiveExec.
/**
* @param op
* @param dimension
* @return
*/
protected INDArray naiveExec(Accumulation op, int... dimension) {
long st = profilingHookIn(op);
INDArray ret = op.z();
validateDataType(Nd4j.dataType(), op);
for (int i = 0; i < dimension.length; i++) if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)
throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y());
if (CudaEnvironment.getInstance().getConfiguration().isDebug())
lastOp.set(op.opName());
Pointer hostYShapeInfo = op.y() == null ? null : AddressRetriever.retrieveHostPointer(op.y().shapeInfoDataBuffer());
Pointer hostZShapeInfo = op.z() == null ? null : AddressRetriever.retrieveHostPointer(op.z().shapeInfoDataBuffer());
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), dimension);
/*
if (op.opNum() == 3) {
log.info("Max shape: {}", Arrays.toString(op.x().shapeInfoDataBuffer().asInt()));
log.info("Max TAD: {}", Arrays.toString(tadBuffers.getFirst().asInt()));
context.syncOldStream();
}
*/
Pointer hostTadShapeInfo = AddressRetriever.retrieveHostPointer(tadBuffers.getFirst());
Pointer devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
DataBuffer offsets = tadBuffers.getSecond();
Pointer devTadOffsets = offsets == null ? null : AtomicAllocator.getInstance().getPointer(offsets, context);
Pointer x = AtomicAllocator.getInstance().getPointer(op.x(), context);
Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context);
if (extraz.get() == null)
extraz.set(new PointerPointer(32));
PointerPointer xShapeInfoHostPointer = extraz.get().put(AddressRetriever.retrieveHostPointer(op.x().shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), hostYShapeInfo, hostZShapeInfo, hostTadShapeInfo, devTadShapeInfo, devTadOffsets);
Pointer yDevTadOffsets = null;
Pointer yDevTadShapeInfo = null;
if (op.y() != null) {
if ((dimension.length == 1 && dimension[0] == Integer.MAX_VALUE) || op.x().tensorAlongDimension(0, dimension).lengthLong() != op.y().lengthLong()) {
if (!op.isComplexAccumulation() && op.x().lengthLong() != op.y().lengthLong())
throw new ND4JIllegalStateException("Op.X [" + op.x().lengthLong() + "] and Op.Y [" + op.y().lengthLong() + "] lengths should match");
Pair<DataBuffer, DataBuffer> yTadBuffers = tadManager.getTADOnlyShapeInfo(op.y(), dimension);
yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(yTadBuffers.getFirst(), context);
DataBuffer yOffsets = yTadBuffers.getSecond();
yDevTadOffsets = yOffsets == null ? null : AtomicAllocator.getInstance().getPointer(yOffsets, context);
xShapeInfoHostPointer.put(12, yDevTadShapeInfo);
xShapeInfoHostPointer.put(13, yDevTadOffsets);
} else {
// TAD vs full array code branch
val fakeOffsets = Nd4j.getConstantHandler().getConstantBuffer(new int[] { 0, 0 });
yDevTadOffsets = fakeOffsets == null ? null : AtomicAllocator.getInstance().getPointer(fakeOffsets, context);
yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
xShapeInfoHostPointer.put(12, AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
xShapeInfoHostPointer.put(13, null);
}
}
Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : null;
// Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : 0;
// Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), // AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
context);
if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
if (op instanceof Variance) {
if (ret.isScalar()) {
double res = nativeOps.execSummaryStatsScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, ((Variance) op).isBiasCorrected());
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execSummaryStatsDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
} else if (op.y() != null) {
if (op.isComplexAccumulation()) {
val dT = new LongPointerWrapper(devTadOffsets);
val yT = new LongPointerWrapper(yDevTadOffsets);
nativeOps.execReduce3AllDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, dT, (IntPointer) yDevTadShapeInfo, yT);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
} else if (ret.isScalar()) {
double res = nativeOps.execReduce3ScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execReduce3Double(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
} else {
if (ret.isScalar()) {
double res = nativeOps.execReduceScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execReduceDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
}
} else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
if (op instanceof Variance) {
if (ret.isScalar()) {
float res = nativeOps.execSummaryStatsScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, ((Variance) op).isBiasCorrected());
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execSummaryStatsFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
} else if (op.y() != null) {
if (op.isComplexAccumulation()) {
nativeOps.execReduce3AllFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, new LongPointerWrapper(devTadOffsets), (IntPointer) yDevTadShapeInfo, new LongPointerWrapper(yDevTadOffsets));
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
} else if (ret.isScalar()) {
float res = nativeOps.execReduce3ScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
ret.assign(res);
op.setFinalResult(res);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
} else {
nativeOps.execReduce3Float(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
} else {
if (ret.isScalar()) {
float res = nativeOps.execReduceScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
}
} else {
if (op instanceof Variance) {
if (ret.isScalar()) {
float res = nativeOps.execSummaryStatsScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, ((Variance) op).isBiasCorrected());
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execSummaryStatsHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
} else if (op.y() != null) {
if (op.isComplexAccumulation()) {
nativeOps.execReduce3AllHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, new LongPointerWrapper(devTadOffsets), (IntPointer) yDevTadShapeInfo, new LongPointerWrapper(yDevTadOffsets));
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
} else if (ret.isScalar()) {
float res = nativeOps.execReduce3ScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execReduce3Half(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
} else {
if (ret.isScalar()) {
float res = nativeOps.execReduceScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
ret.assign(res);
op.setFinalResult(res);
} else {
nativeOps.execReduceHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
}
}
}
profilingHookOut(op, st);
return op.z();
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class BasicTADManager method getTADOnlyShapeInfo.
@Override
public Pair<DataBuffer, DataBuffer> getTADOnlyShapeInfo(INDArray array, int[] dimension) {
if (dimension != null && dimension.length > 1)
Arrays.sort(dimension);
if (dimension == null)
dimension = new int[] { Integer.MAX_VALUE };
boolean isScalar = dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE);
// FIXME: this is fast triage, remove it later
// dimensionLength <= 1 ? 2 : dimensionLength;
int targetRank = isScalar ? 2 : array.rank();
long offsetLength = 0;
long tadLength = 1;
if (!isScalar)
for (int i = 0; i < dimension.length; i++) {
tadLength *= array.shape()[dimension[i]];
}
if (!isScalar)
offsetLength = array.lengthLong() / tadLength;
else
offsetLength = 1;
// logger.info("Original shape info before TAD: {}", array.shapeInfoDataBuffer());
// logger.info("dimension: {}, tadLength: {}, offsetLength for TAD: {}", Arrays.toString(dimension),tadLength, offsetLength);
DataBuffer outputBuffer = new CudaIntDataBuffer(targetRank * 2 + 4);
DataBuffer offsetsBuffer = new CudaLongDataBuffer(offsetLength);
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
DataBuffer dimensionBuffer = AtomicAllocator.getInstance().getConstantBuffer(dimension);
Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(dimensionBuffer);
Pointer xShapeInfo = AddressRetriever.retrieveHostPointer(array.shapeInfoDataBuffer());
Pointer targetPointer = AddressRetriever.retrieveHostPointer(outputBuffer);
Pointer offsetsPointer = AddressRetriever.retrieveHostPointer(offsetsBuffer);
if (!isScalar)
nativeOps.tadOnlyShapeInfo((IntPointer) xShapeInfo, (IntPointer) dimensionPointer, dimension.length, (IntPointer) targetPointer, new LongPointerWrapper(offsetsPointer));
else {
outputBuffer.put(0, 2);
outputBuffer.put(1, 1);
outputBuffer.put(2, 1);
outputBuffer.put(3, 1);
outputBuffer.put(4, 1);
outputBuffer.put(5, 0);
outputBuffer.put(6, 0);
outputBuffer.put(7, 99);
}
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
return new Pair<>(outputBuffer, offsetsBuffer);
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class NativeOpExecutioner method exec.
@Override
public INDArray exec(Accumulation op, int... dimension) {
dimension = Shape.normalizeAxis(op.x().rank(), dimension);
validateDataType(Nd4j.dataType(), op);
if (extraz.get() == null)
extraz.set(new PointerPointer(32));
int[] maxShape = Shape.getMaxShape(op.x(), op.y());
for (int i = 0; i < dimension.length; i++) if (dimension[i] >= maxShape.length && dimension[i] != Integer.MAX_VALUE)
throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
for (int i = 0; i < dimension.length; i++) {
if (dimension[i] < 0)
dimension[i] += op.x().rank();
}
// do op along all dimensions
if (dimension.length == op.x().rank())
dimension = new int[] { Integer.MAX_VALUE };
int[] retShape;
if (Shape.wholeArrayDimension(dimension))
retShape = new int[] { 1, 1 };
else
retShape = ArrayUtil.removeIndex(maxShape, dimension);
// ensure vector is proper shape
if (retShape.length == 1) {
if (dimension[0] == 0)
retShape = new int[] { 1, retShape[0] };
else
retShape = new int[] { retShape[0], 1 };
} else if (retShape.length == 0) {
retShape = new int[] { 1, 1 };
}
if (op.x().isVector() && op.x().length() == ArrayUtil.prod(retShape) && ArrayUtil.prodLong(retShape) > 1 && op.y() == null)
return op.noOp();
/**
* This is the result array.
* We create it only if we hadn't provided it before
*/
INDArray ret;
if (op.z() == null || op.z() == op.x()) {
if (op.isComplexAccumulation()) {
int xT = op.x().tensorssAlongDimension(dimension);
int yT = op.y().tensorssAlongDimension(dimension);
ret = Nd4j.create(xT, yT);
} else {
if (op.y() != null) {
// 2 options here: either pairwise, equal sizes - OR every X TAD vs. entirety of Y
if (op.x().lengthLong() == op.y().lengthLong()) {
// Pairwise
if (op.x().tensorssAlongDimension(dimension) != op.y().tensorssAlongDimension(dimension)) {
throw new ND4JIllegalStateException("Number of TADs along dimension don't match: (x shape = " + Arrays.toString(op.x().shape()) + ", y shape = " + Arrays.toString(op.y().shape()) + ", dimension = " + Arrays.toString(dimension) + ")");
}
} else {
// Every X TAD vs. entirety of Y
val xTADSize = op.x().lengthLong() / op.x().tensorssAlongDimension(dimension);
if (xTADSize != op.y().length()) {
throw new ND4JIllegalStateException("Size of TADs along dimension don't match for pairwise execution:" + " (x TAD size = " + xTADSize + ", y size = " + op.y().lengthLong());
}
}
}
if (op.x().data().dataType() == DataBuffer.Type.DOUBLE)
ret = Nd4j.valueArrayOf(retShape, op.zeroDouble());
else
ret = Nd4j.valueArrayOf(retShape, op.zeroFloat());
}
op.setZ(ret);
} else {
// compare length
if (!op.isComplexAccumulation() && op.z().lengthLong() != ArrayUtil.prodLong(retShape))
throw new ND4JIllegalStateException("Shape of target array for reduction [" + Arrays.toString(op.z().shape()) + "] doesn't match expected [" + Arrays.toString(retShape) + "]");
else if (op.isComplexAccumulation()) {
int xT = op.x().tensorssAlongDimension(dimension);
int yT = op.y().tensorssAlongDimension(dimension);
if (op.z().lengthLong() != xT * yT)
throw new ND4JIllegalStateException("Shape of target array for reduction [" + Arrays.toString(op.z().shape()) + "] doesn't match expected [" + (xT * yT) + "]");
}
if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
op.z().assign(op.zeroDouble());
} else {
op.z().assign(op.zeroFloat());
}
ret = op.z();
}
/**
* Returns the {@link Shape#createShapeInformation(int[], int[], int, int, char)}
* and the associated offsets for each {@link INDArray#tensorAlongDimension(int, int...)}
* The first item is the shape information. The second one is the offsets.
*/
Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), dimension);
Pair<DataBuffer, DataBuffer> yTadBuffers = null;
/**
* Note that we use addresses in libnd4j.
* We use reinterpret cast in c to take the long
* we pass to JNI. This manages overhead.
*/
Pointer hostTadShapeInfo = tadBuffers.getFirst().addressPointer();
DataBuffer offsets = tadBuffers.getSecond();
Pointer hostTadOffsets = offsets == null ? null : offsets.addressPointer();
// we're going to check, if that's TAD vs TAD comparison or TAD vs full array. if later - we're going slightly different route
boolean tvf = false;
if (op.y() != null) {
if (op.x().tensorAlongDimension(0, dimension).lengthLong() == op.y().lengthLong()) {
tvf = true;
}
}
if (op.isComplexAccumulation()) {
yTadBuffers = tadManager.getTADOnlyShapeInfo(op.y(), dimension);
if (op.x().tensorAlongDimension(0, dimension).lengthLong() != op.y().tensorAlongDimension(0, dimension).lengthLong())
throw new ND4JIllegalStateException("Impossible to issue AllDistances operation: TAD lengths mismatch along given dimension");
}
/**
* This is a pointer to a pointer in c.
*/
// FIXME: we need something better then 3rd element being non-null here...
PointerPointer dummy = extraz.get().put(hostTadShapeInfo, hostTadOffsets, tvf ? hostTadOffsets : null);
long st = profilingHookIn(op, tadBuffers.getFirst());
/**
* Note because dimension arrays don't change,
* we use an {@link ConstantHandler} which knows how to reserve memory
* for immutable buffers for the dimensions.
* This gives us a pointer which is passed around in libnd4j.
*/
Pointer dimensionAddress = constantHandler.getConstantBuffer(dimension).addressPointer();
if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
if (op instanceof Variance) {
if (ret.isScalar()) {
ret.putScalar(0, loop.execSummaryStatsScalarDouble(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op), true));
} else {
Variance var = (Variance) op;
loop.execSummaryStatsDouble(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op), (DoublePointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length, var.isBiasCorrected());
}
} else // pairwise reduction like similarity of two arrays
if (op.y() != null && op.getOpType() == Op.Type.REDUCE3) {
if (op.isComplexAccumulation()) {
loop.execReduce3AllDouble(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op), (DoublePointer) op.y().data().addressPointer(), (IntPointer) op.y().shapeInfoDataBuffer().addressPointer(), (DoublePointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length, (IntPointer) tadBuffers.getFirst().addressPointer(), new LongPointerWrapper(tadBuffers.getSecond().addressPointer()), (IntPointer) yTadBuffers.getFirst().addressPointer(), new LongPointerWrapper(yTadBuffers.getSecond().addressPointer()));
} else if (ret.isScalar()) {
ret.putScalar(0, loop.execReduce3ScalarDouble(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op), (DoublePointer) op.y().data().addressPointer(), (IntPointer) op.y().shapeInfoDataBuffer().addressPointer()));
} else {
loop.execReduce3Double(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op), (DoublePointer) op.y().data().addressPointer(), (IntPointer) op.y().shapeInfoDataBuffer().addressPointer(), (DoublePointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length);
}
} else {
if (ret.isScalar()) {
ret.putScalar(0, loop.execReduceScalarDouble(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op)));
} else {
loop.execReduceDouble(dummy, op.opNum(), (DoublePointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (DoublePointer) getPointerForExtraArgs(op), (DoublePointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length);
}
}
} else {
if (op instanceof Variance) {
Variance variance = (Variance) op;
if (ret.isScalar()) {
ret.putScalar(0, loop.execSummaryStatsScalarFloat(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op), variance.isBiasCorrected()));
} else {
loop.execSummaryStatsFloat(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op), (FloatPointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length, variance.isBiasCorrected());
}
} else if (op.y() != null && op.getOpType() == Op.Type.REDUCE3) {
if (op.isComplexAccumulation()) {
loop.execReduce3AllFloat(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op), (FloatPointer) op.y().data().addressPointer(), (IntPointer) op.y().shapeInfoDataBuffer().addressPointer(), (FloatPointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length, (IntPointer) tadBuffers.getFirst().addressPointer(), new LongPointerWrapper(tadBuffers.getSecond().addressPointer()), (IntPointer) yTadBuffers.getFirst().addressPointer(), new LongPointerWrapper(yTadBuffers.getSecond().addressPointer()));
} else if (ret.isScalar()) {
ret.putScalar(0, loop.execReduce3ScalarFloat(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op), (FloatPointer) op.y().data().addressPointer(), (IntPointer) op.y().shapeInfoDataBuffer().addressPointer()));
} else {
loop.execReduce3Float(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op), (FloatPointer) op.y().data().addressPointer(), (IntPointer) op.y().shapeInfoDataBuffer().addressPointer(), (FloatPointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length);
}
} else {
if (ret.isScalar()) {
ret.putScalar(0, loop.execReduceScalarFloat(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op)));
} else {
loop.execReduceFloat(dummy, op.opNum(), (FloatPointer) op.x().data().addressPointer(), (IntPointer) op.x().shapeInfoDataBuffer().addressPointer(), (FloatPointer) getPointerForExtraArgs(op), (FloatPointer) op.z().data().addressPointer(), (IntPointer) op.z().shapeInfoDataBuffer().addressPointer(), (IntPointer) dimensionAddress, dimension.length);
}
}
}
return ret;
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class CpuNDArrayFactory method sort.
@Override
public INDArray sort(INDArray x, boolean descending, int... dimension) {
if (x.isScalar())
return x;
Arrays.sort(dimension);
Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(x, dimension);
if (x.data().dataType() == DataBuffer.Type.FLOAT) {
NativeOpsHolder.getInstance().getDeviceNativeOps().sortTadFloat(null, (FloatPointer) x.data().addressPointer(), (IntPointer) x.shapeInfoDataBuffer().addressPointer(), (IntPointer) Nd4j.getConstantHandler().getConstantBuffer(dimension).addressPointer(), dimension.length, (IntPointer) tadBuffers.getFirst().addressPointer(), new LongPointerWrapper(tadBuffers.getSecond().addressPointer()), descending);
} else if (x.data().dataType() == DataBuffer.Type.DOUBLE) {
NativeOpsHolder.getInstance().getDeviceNativeOps().sortTadDouble(null, (DoublePointer) x.data().addressPointer(), (IntPointer) x.shapeInfoDataBuffer().addressPointer(), (IntPointer) Nd4j.getConstantHandler().getConstantBuffer(dimension).addressPointer(), dimension.length, (IntPointer) tadBuffers.getFirst().addressPointer(), new LongPointerWrapper(tadBuffers.getSecond().addressPointer()), descending);
} else {
throw new UnsupportedOperationException("Unknown dataype " + x.data().dataType());
}
return x;
}
use of org.nd4j.nativeblas.LongPointerWrapper in project nd4j by deeplearning4j.
the class JCublasNDArrayFactory method sort.
@Override
public INDArray sort(INDArray x, boolean descending, int... dimension) {
if (x.isScalar())
return x;
Arrays.sort(dimension);
Nd4j.getExecutioner().push();
Pair<DataBuffer, DataBuffer> tadBuffers = Nd4j.getExecutioner().getTADManager().getTADOnlyShapeInfo(x, dimension);
CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(x);
PointerPointer extraz = new // not used
PointerPointer(// not used
AtomicAllocator.getInstance().getHostPointer(x.shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer());
Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), context);
if (x.data().dataType() == DataBuffer.Type.FLOAT) {
nativeOps.sortTadFloat(extraz, (FloatPointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
} else if (x.data().dataType() == DataBuffer.Type.DOUBLE) {
nativeOps.sortTadDouble(extraz, (DoublePointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
} else if (x.data().dataType() == DataBuffer.Type.HALF) {
nativeOps.sortTadHalf(extraz, (ShortPointer) AtomicAllocator.getInstance().getPointer(x, context), (IntPointer) AtomicAllocator.getInstance().getPointer(x.shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context), new LongPointerWrapper(AtomicAllocator.getInstance().getPointer(tadBuffers.getSecond(), context)), descending);
} else {
throw new UnsupportedOperationException("Unknown dataType " + x.data().dataType());
}
AtomicAllocator.getInstance().getFlowController().registerAction(context, x);
return x;
}
Aggregations