use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.
the class L1NormalizationLayer method eval.
@Nonnull
@Override
public Result eval(@Nonnull final Result... input) {
Arrays.stream(input).forEach(nnResult -> nnResult.addRef());
final Result in = input[0];
final TensorList inData = in.getData();
inData.addRef();
return new Result(TensorArray.wrap(IntStream.range(0, inData.length()).mapToObj(dataIndex -> {
@Nullable final Tensor value = inData.get(dataIndex);
try {
final double sum = value.sum();
if (!Double.isFinite(sum) || 0 == sum) {
value.addRef();
return value;
} else {
return value.scale(1.0 / sum);
}
} finally {
value.freeRef();
}
}).toArray(i -> new Tensor[i])), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList outDelta) -> {
if (in.isAlive()) {
final Tensor[] passbackArray = IntStream.range(0, outDelta.length()).mapToObj(dataIndex -> {
Tensor inputTensor = inData.get(dataIndex);
@Nullable final double[] value = inputTensor.getData();
Tensor outputTensor = outDelta.get(dataIndex);
@Nullable final double[] delta = outputTensor.getData();
final double dot = ArrayUtil.dot(value, delta);
final double sum = Arrays.stream(value).sum();
@Nonnull final Tensor passback = new Tensor(outputTensor.getDimensions());
@Nullable final double[] passbackData = passback.getData();
if (0 != sum || Double.isFinite(sum)) {
for (int i = 0; i < value.length; i++) {
passbackData[i] = (delta[i] - dot / sum) / sum;
}
}
outputTensor.freeRef();
inputTensor.freeRef();
return passback;
}).toArray(i -> new Tensor[i]);
assert Arrays.stream(passbackArray).flatMapToDouble(x -> Arrays.stream(x.getData())).allMatch(v -> Double.isFinite(v));
@Nonnull TensorArray tensorArray = TensorArray.wrap(passbackArray);
in.accumulate(buffer, tensorArray);
}
}) {
@Override
protected void _free() {
inData.freeRef();
Arrays.stream(input).forEach(nnResult -> nnResult.freeRef());
}
@Override
public boolean isAlive() {
return in.isAlive();
}
};
}
use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.
the class CudnnTest method testTensorListMT.
private void testTensorListMT(@Nonnull NotebookOutput log, @Nonnull int[] dimensions, int length, double tolerance, int accumulations) {
@Nonnull Supplier<TensorList> factory = () -> TensorArray.wrap(IntStream.range(0, length).mapToObj(j -> {
@Nonnull Tensor tensor = new Tensor(dimensions);
Arrays.parallelSetAll(tensor.getData(), this::random);
return tensor;
}).toArray(j -> new Tensor[j]));
log.code(() -> {
@Nonnull ListeningExecutorService pool = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(5));
PrintStream out = SysOutInterceptor.INSTANCE.currentHandler();
try {
List<ListenableFuture<Object>> collect = IntStream.range(0, 16).mapToObj(workerNumber -> {
@Nonnull TimedResult<TensorList> originalTiming = TimedResult.time(() -> factory.get());
TensorList original = originalTiming.result;
logger.info(String.format("[%s] Calculated test data in %.4fsec", workerNumber, originalTiming.seconds()));
@Nonnull ListenableFuture<TensorList> mutableDataFuture = pool.submit(() -> CudaSystem.run(gpu -> {
PrintStream oldHandler = SysOutInterceptor.INSTANCE.setCurrentHandler(out);
@Nonnull TimedResult<CudaTensor> timedResult = TimedResult.time(() -> {
return gpu.getTensor(original, Precision.Double, MemoryType.Managed, false);
});
logger.info(String.format("[%s] Wrote %s in %.4f seconds, Device %d: %s", workerNumber, Arrays.toString(dimensions), timedResult.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
SysOutInterceptor.INSTANCE.setCurrentHandler(oldHandler);
return CudaTensorList.wrap(timedResult.result, length, dimensions, Precision.Double);
}, original));
@Nonnull TimedResult<List<TensorList>> accumulantTiming = TimedResult.time(() -> IntStream.range(0, accumulations).mapToObj(x -> factory.get()).collect(Collectors.toList()));
List<TensorList> accumulants = accumulantTiming.result;
@Nonnull TimedResult<TensorList> finalResultTiming = TimedResult.time(() -> {
return accumulants.stream().map(x -> {
x.addRef();
return x;
}).reduce((a, b) -> {
TensorList sum = a.addAndFree(b);
b.freeRef();
return sum;
}).map(x -> {
TensorList sum = x.add(original);
x.freeRef();
return sum;
}).orElseGet(() -> {
original.addRef();
return original;
});
});
logger.info(String.format("[%s] Calculated accumulant in %.4fsec", workerNumber, accumulantTiming.seconds()));
@Nonnull ListenableFuture<TensorList> accumulated = Futures.transform(mutableDataFuture, (x) -> {
PrintStream oldHandler = SysOutInterceptor.INSTANCE.setCurrentHandler(out);
@Nonnull AtomicReference<TensorList> mutableGpuData = new AtomicReference<>(x);
accumulants.stream().parallel().forEach(delta -> {
CudaSystem.run(gpu -> {
@Nonnull TimedResult<CudaTensorList> timedWrite = TimedResult.time(() -> {
@Nullable CudaTensor cudaMemory = gpu.getTensor(delta, Precision.Double, MemoryType.Managed, false);
delta.freeRef();
return CudaTensorList.wrap(cudaMemory, length, dimensions, Precision.Double);
});
@Nonnull TimedResult<Void> timedAccumulation = TimedResult.time(() -> {
synchronized (mutableGpuData) {
mutableGpuData.getAndUpdate(y -> {
TensorList add = y.add(timedWrite.result);
y.freeRef();
return add;
});
}
timedWrite.result.freeRef();
});
logger.info(String.format("[%s] Wrote in %.4f seconds and accumulated %s in %.4f seconds, Device %d: %s", workerNumber, timedAccumulation.seconds(), Arrays.toString(dimensions), timedWrite.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
}, delta);
});
SysOutInterceptor.INSTANCE.setCurrentHandler(oldHandler);
return mutableGpuData.get();
}, pool);
TensorList finalResult = finalResultTiming.result;
logger.info(String.format("[%s] Calculated final data in %.4fsec", workerNumber, finalResultTiming.seconds()));
return Futures.transform(accumulated, (write) -> {
original.freeRef();
PrintStream oldHandler = SysOutInterceptor.INSTANCE.setCurrentHandler(out);
CudaSystem.run(gpu -> {
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nonnull TensorList minus = finalResult.minus(write);
double diffVal = minus.stream().mapToDouble(x -> {
double v = Arrays.stream(x.getData()).map(Math::abs).max().getAsDouble();
x.freeRef();
return v;
}).max().getAsDouble();
minus.freeRef();
return diffVal < tolerance;
});
logger.info(String.format("[%s] Read %s and verified in %.4fs using device %d: %s", workerNumber, Arrays.toString(dimensions), timedVerify.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
if (!timedVerify.result)
Assert.assertTrue(finalResult.prettyPrint() + " != " + write.prettyPrint(), timedVerify.result);
write.freeRef();
});
SysOutInterceptor.INSTANCE.setCurrentHandler(oldHandler);
finalResult.freeRef();
return null;
}, pool);
}).collect(Collectors.toList());
List<Object> objects = Futures.allAsList(collect).get();
} catch (@Nonnull InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
} finally {
pool.shutdown();
}
});
}
use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.
the class CudnnTest method memoryTransfer.
private void memoryTransfer(@Nonnull NotebookOutput log, int... size) {
@Nonnull Supplier<TensorList> factory = () -> TensorArray.wrap(IntStream.range(0, 1).mapToObj(j -> {
@Nonnull Tensor tensor = new Tensor(size);
Arrays.parallelSetAll(tensor.getData(), this::random);
return tensor;
}).toArray(j -> new Tensor[j]));
TensorList original = factory.get();
log.code(() -> {
CudaTensor write = CudaSystem.run(gpu -> {
@Nonnull TimedResult<CudaTensor> timedResult = TimedResult.time(() -> {
return gpu.getTensor(original, Precision.Double, MemoryType.Managed, false);
});
int deviceNumber = gpu.getDeviceId();
logger.info(String.format("Wrote %s bytes in %.4f seconds, Device %d: %s", Arrays.toString(size), timedResult.seconds(), deviceNumber, CudaDevice.getDeviceName(deviceNumber)));
return timedResult.result;
}, original);
CudnnHandle.forEach(gpu -> {
@Nonnull Tensor readCopy = new Tensor(size);
@Nonnull TimedResult<CudaMemory> timedResult = TimedResult.time(() -> {
CudaMemory cudaMemory = write.getMemory(gpu);
CudaMemory read = cudaMemory.read(Precision.Double, readCopy.getData());
cudaMemory.freeRef();
return read;
});
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nullable Tensor tensor = original.get(0);
boolean equals = tensor.equals(readCopy);
tensor.freeRef();
return equals;
});
int deviceNumber = gpu.getDeviceId();
logger.info(String.format("Read %s bytes in %.4f seconds and verified in %.4fs using device %d: %s", Arrays.toString(size), timedResult.seconds(), timedVerify.seconds(), deviceNumber, CudaDevice.getDeviceName(deviceNumber)));
if (!timedVerify.result)
Assert.assertTrue(original.prettyPrint() + " != " + readCopy.prettyPrint(), timedVerify.result);
readCopy.freeRef();
});
write.freeRef();
});
original.freeRef();
}
use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.
the class CudnnTest method testTensorList.
private void testTensorList(@Nonnull NotebookOutput log, @Nonnull int[] dimensions, int length, double tolerance, int accumulations) {
@Nonnull Supplier<TensorList> factory = () -> TensorArray.wrap(IntStream.range(0, length).mapToObj(j -> {
@Nonnull Tensor tensor = new Tensor(dimensions);
Arrays.parallelSetAll(tensor.getData(), this::random);
return tensor;
}).toArray(j -> new Tensor[j]));
log.code(() -> {
@Nonnull TimedResult<TensorList> originalTiming = TimedResult.time(() -> factory.get());
logger.info(String.format("Calculated test data in %.4fsec", originalTiming.seconds()));
TensorList original = originalTiming.result;
@Nonnull AtomicReference<TensorList> mutableGpuData = new AtomicReference<>(CudaSystem.run(gpu -> {
@Nonnull TimedResult<CudaTensor> timedResult = TimedResult.time(() -> {
return gpu.getTensor(original, Precision.Double, MemoryType.Managed, false);
});
logger.info(String.format("Wrote %s in %.4f seconds, Device %d: %s", Arrays.toString(dimensions), timedResult.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
return CudaTensorList.wrap(timedResult.result, length, dimensions, Precision.Double);
}, original));
CudnnHandle.forEach(ctx -> {
@Nonnull TimedResult<TensorList> timedResult = TimedResult.time(() -> (mutableGpuData.get() instanceof CudaTensorList) ? ((CudaTensorList) mutableGpuData.get()).getHeapCopy() : mutableGpuData.get());
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nonnull TensorList minus = original.minus(timedResult.result);
double variance = minus.stream().mapToDouble(x -> Arrays.stream(x.getData()).map(Math::abs).max().getAsDouble()).max().getAsDouble();
minus.freeRef();
return variance < tolerance;
});
logger.info(String.format("Read %s in %.4f seconds and verified in %.4fs using device %d: %s", Arrays.toString(dimensions), timedResult.seconds(), timedVerify.seconds(), ctx.getDeviceId(), CudaDevice.getDeviceName(ctx.getDeviceId())));
if (!timedVerify.result)
Assert.assertTrue(original.prettyPrint() + " != " + timedResult.result.prettyPrint(), timedVerify.result);
timedResult.result.freeRef();
});
@Nonnull TimedResult<List<TensorList>> accumulantTiming = TimedResult.time(() -> IntStream.range(0, accumulations).mapToObj(x -> factory.get()).collect(Collectors.toList()));
logger.info(String.format("Calculated accumulant in %.4fsec", accumulantTiming.seconds()));
List<TensorList> accumulants = accumulantTiming.result;
accumulants.stream().forEach(accumulant -> {
CudaSystem.run(gpu -> {
@Nonnull TimedResult<TensorList> timedWrite = TimedResult.time(() -> {
return CudaTensorList.wrap(gpu.getTensor(accumulant, Precision.Double, MemoryType.Managed, false), length, dimensions, Precision.Double);
});
@Nonnull TimedResult<Void> timedAccumulation = TimedResult.time(() -> {
mutableGpuData.getAndUpdate(x -> x.add(timedWrite.result)).freeRef();
timedWrite.result.freeRef();
});
logger.info(String.format("Wrote in %.4f seconds and accumulated %s in %.4f seconds, Device %d: %s", timedAccumulation.seconds(), Arrays.toString(dimensions), timedWrite.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
}, accumulant);
});
@Nonnull TimedResult<TensorList> finalResultTiming = TimedResult.time(() -> {
return accumulants.stream().reduce((a, b) -> {
TensorList sum = a.addAndFree(b);
b.freeRef();
return sum;
}).map(x -> {
TensorList sum = x.add(original);
x.freeRef();
return sum;
}).orElseGet(() -> {
original.addRef();
return original;
});
});
original.freeRef();
logger.info(String.format("Calculated final data in %.4fsec", finalResultTiming.seconds()));
TensorList finalResult = finalResultTiming.result;
CudnnHandle.forEach(ctx -> {
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nonnull TensorList minus = finalResult.minus(mutableGpuData.get());
double diffVal = minus.stream().mapToDouble(x -> {
double v = Arrays.stream(x.getData()).map(Math::abs).max().getAsDouble();
x.freeRef();
return v;
}).max().getAsDouble();
minus.freeRef();
return diffVal < tolerance;
});
logger.info(String.format("Read %s and verified in %.4fs using device %d: %s", Arrays.toString(dimensions), timedVerify.seconds(), ctx.getDeviceId(), CudaDevice.getDeviceName(ctx.getDeviceId())));
if (!timedVerify.result)
Assert.assertTrue(finalResult.prettyPrint() + " != " + mutableGpuData.get().prettyPrint(), timedVerify.result);
});
mutableGpuData.get().freeRef();
finalResult.freeRef();
});
}
use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.
the class SimpleListEval method getFeedback.
/**
* Gets feedback.
*
* @param data the data
* @return the feedback
*/
@Nonnull
public TensorList getFeedback(@Nonnull final TensorList data) {
return TensorArray.wrap(data.stream().map(t -> {
@Nullable Tensor map = t.map(v -> 1.0);
t.freeRef();
return map;
}).toArray(i -> new Tensor[i]));
}
Aggregations