use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class MonitoringWrapperLayer method evalAndFree.
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
@Nonnull final AtomicLong passbackNanos = new AtomicLong(0);
final Result[] wrappedInput = Arrays.stream(inObj).map(result -> {
return new Result(result.getData(), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList data) -> {
data.addRef();
passbackNanos.addAndGet(TimedResult.time(() -> result.accumulate(buffer, data)).timeNanos);
}) {
@Override
protected void _free() {
result.freeRef();
}
@Override
public boolean isAlive() {
return result.isAlive();
}
};
}).toArray(i -> new Result[i]);
@Nonnull TimedResult<Result> timedResult = TimedResult.time(() -> getInner().evalAndFree(wrappedInput));
final Result output = timedResult.result;
forwardPerformance.add((timedResult.timeNanos) / 1000000000.0);
totalBatches++;
final int items = Arrays.stream(inObj).mapToInt(x -> x.getData().length()).max().orElse(1);
totalItems += items;
if (recordSignalMetrics) {
forwardSignal.clear();
output.getData().stream().parallel().forEach(t -> {
forwardSignal.add(t.getData());
t.freeRef();
});
}
return new Result(output.getData(), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList data) -> {
if (recordSignalMetrics) {
backwardSignal.clear();
data.stream().parallel().forEach(t -> {
backwardSignal.add(t.getData());
t.freeRef();
});
}
data.addRef();
backwardPerformance.add((TimedResult.time(() -> output.accumulate(buffer, data)).timeNanos - passbackNanos.getAndSet(0)) / (items * 1e9));
}) {
@Override
protected void _free() {
output.freeRef();
}
@Override
public boolean isAlive() {
return output.isAlive();
}
};
}
use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class CudnnTest method testTensorListMT.
private void testTensorListMT(@Nonnull NotebookOutput log, @Nonnull int[] dimensions, int length, double tolerance, int accumulations) {
@Nonnull Supplier<TensorList> factory = () -> TensorArray.wrap(IntStream.range(0, length).mapToObj(j -> {
@Nonnull Tensor tensor = new Tensor(dimensions);
Arrays.parallelSetAll(tensor.getData(), this::random);
return tensor;
}).toArray(j -> new Tensor[j]));
log.code(() -> {
@Nonnull ListeningExecutorService pool = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(5));
PrintStream out = SysOutInterceptor.INSTANCE.currentHandler();
try {
List<ListenableFuture<Object>> collect = IntStream.range(0, 16).mapToObj(workerNumber -> {
@Nonnull TimedResult<TensorList> originalTiming = TimedResult.time(() -> factory.get());
TensorList original = originalTiming.result;
logger.info(String.format("[%s] Calculated test data in %.4fsec", workerNumber, originalTiming.seconds()));
@Nonnull ListenableFuture<TensorList> mutableDataFuture = pool.submit(() -> CudaSystem.run(gpu -> {
PrintStream oldHandler = SysOutInterceptor.INSTANCE.setCurrentHandler(out);
@Nonnull TimedResult<CudaTensor> timedResult = TimedResult.time(() -> {
return gpu.getTensor(original, Precision.Double, MemoryType.Managed, false);
});
logger.info(String.format("[%s] Wrote %s in %.4f seconds, Device %d: %s", workerNumber, Arrays.toString(dimensions), timedResult.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
SysOutInterceptor.INSTANCE.setCurrentHandler(oldHandler);
return CudaTensorList.wrap(timedResult.result, length, dimensions, Precision.Double);
}, original));
@Nonnull TimedResult<List<TensorList>> accumulantTiming = TimedResult.time(() -> IntStream.range(0, accumulations).mapToObj(x -> factory.get()).collect(Collectors.toList()));
List<TensorList> accumulants = accumulantTiming.result;
@Nonnull TimedResult<TensorList> finalResultTiming = TimedResult.time(() -> {
return accumulants.stream().map(x -> {
x.addRef();
return x;
}).reduce((a, b) -> {
TensorList sum = a.addAndFree(b);
b.freeRef();
return sum;
}).map(x -> {
TensorList sum = x.add(original);
x.freeRef();
return sum;
}).orElseGet(() -> {
original.addRef();
return original;
});
});
logger.info(String.format("[%s] Calculated accumulant in %.4fsec", workerNumber, accumulantTiming.seconds()));
@Nonnull ListenableFuture<TensorList> accumulated = Futures.transform(mutableDataFuture, (x) -> {
PrintStream oldHandler = SysOutInterceptor.INSTANCE.setCurrentHandler(out);
@Nonnull AtomicReference<TensorList> mutableGpuData = new AtomicReference<>(x);
accumulants.stream().parallel().forEach(delta -> {
CudaSystem.run(gpu -> {
@Nonnull TimedResult<CudaTensorList> timedWrite = TimedResult.time(() -> {
@Nullable CudaTensor cudaMemory = gpu.getTensor(delta, Precision.Double, MemoryType.Managed, false);
delta.freeRef();
return CudaTensorList.wrap(cudaMemory, length, dimensions, Precision.Double);
});
@Nonnull TimedResult<Void> timedAccumulation = TimedResult.time(() -> {
synchronized (mutableGpuData) {
mutableGpuData.getAndUpdate(y -> {
TensorList add = y.add(timedWrite.result);
y.freeRef();
return add;
});
}
timedWrite.result.freeRef();
});
logger.info(String.format("[%s] Wrote in %.4f seconds and accumulated %s in %.4f seconds, Device %d: %s", workerNumber, timedAccumulation.seconds(), Arrays.toString(dimensions), timedWrite.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
}, delta);
});
SysOutInterceptor.INSTANCE.setCurrentHandler(oldHandler);
return mutableGpuData.get();
}, pool);
TensorList finalResult = finalResultTiming.result;
logger.info(String.format("[%s] Calculated final data in %.4fsec", workerNumber, finalResultTiming.seconds()));
return Futures.transform(accumulated, (write) -> {
original.freeRef();
PrintStream oldHandler = SysOutInterceptor.INSTANCE.setCurrentHandler(out);
CudaSystem.run(gpu -> {
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nonnull TensorList minus = finalResult.minus(write);
double diffVal = minus.stream().mapToDouble(x -> {
double v = Arrays.stream(x.getData()).map(Math::abs).max().getAsDouble();
x.freeRef();
return v;
}).max().getAsDouble();
minus.freeRef();
return diffVal < tolerance;
});
logger.info(String.format("[%s] Read %s and verified in %.4fs using device %d: %s", workerNumber, Arrays.toString(dimensions), timedVerify.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
if (!timedVerify.result)
Assert.assertTrue(finalResult.prettyPrint() + " != " + write.prettyPrint(), timedVerify.result);
write.freeRef();
});
SysOutInterceptor.INSTANCE.setCurrentHandler(oldHandler);
finalResult.freeRef();
return null;
}, pool);
}).collect(Collectors.toList());
List<Object> objects = Futures.allAsList(collect).get();
} catch (@Nonnull InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
} finally {
pool.shutdown();
}
});
}
use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class CudnnTest method memoryTransfer.
private void memoryTransfer(@Nonnull NotebookOutput log, int... size) {
@Nonnull Supplier<TensorList> factory = () -> TensorArray.wrap(IntStream.range(0, 1).mapToObj(j -> {
@Nonnull Tensor tensor = new Tensor(size);
Arrays.parallelSetAll(tensor.getData(), this::random);
return tensor;
}).toArray(j -> new Tensor[j]));
TensorList original = factory.get();
log.code(() -> {
CudaTensor write = CudaSystem.run(gpu -> {
@Nonnull TimedResult<CudaTensor> timedResult = TimedResult.time(() -> {
return gpu.getTensor(original, Precision.Double, MemoryType.Managed, false);
});
int deviceNumber = gpu.getDeviceId();
logger.info(String.format("Wrote %s bytes in %.4f seconds, Device %d: %s", Arrays.toString(size), timedResult.seconds(), deviceNumber, CudaDevice.getDeviceName(deviceNumber)));
return timedResult.result;
}, original);
CudnnHandle.forEach(gpu -> {
@Nonnull Tensor readCopy = new Tensor(size);
@Nonnull TimedResult<CudaMemory> timedResult = TimedResult.time(() -> {
CudaMemory cudaMemory = write.getMemory(gpu);
CudaMemory read = cudaMemory.read(Precision.Double, readCopy.getData());
cudaMemory.freeRef();
return read;
});
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nullable Tensor tensor = original.get(0);
boolean equals = tensor.equals(readCopy);
tensor.freeRef();
return equals;
});
int deviceNumber = gpu.getDeviceId();
logger.info(String.format("Read %s bytes in %.4f seconds and verified in %.4fs using device %d: %s", Arrays.toString(size), timedResult.seconds(), timedVerify.seconds(), deviceNumber, CudaDevice.getDeviceName(deviceNumber)));
if (!timedVerify.result)
Assert.assertTrue(original.prettyPrint() + " != " + readCopy.prettyPrint(), timedVerify.result);
readCopy.freeRef();
});
write.freeRef();
});
original.freeRef();
}
use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class CudnnTest method testTensorList.
private void testTensorList(@Nonnull NotebookOutput log, @Nonnull int[] dimensions, int length, double tolerance, int accumulations) {
@Nonnull Supplier<TensorList> factory = () -> TensorArray.wrap(IntStream.range(0, length).mapToObj(j -> {
@Nonnull Tensor tensor = new Tensor(dimensions);
Arrays.parallelSetAll(tensor.getData(), this::random);
return tensor;
}).toArray(j -> new Tensor[j]));
log.code(() -> {
@Nonnull TimedResult<TensorList> originalTiming = TimedResult.time(() -> factory.get());
logger.info(String.format("Calculated test data in %.4fsec", originalTiming.seconds()));
TensorList original = originalTiming.result;
@Nonnull AtomicReference<TensorList> mutableGpuData = new AtomicReference<>(CudaSystem.run(gpu -> {
@Nonnull TimedResult<CudaTensor> timedResult = TimedResult.time(() -> {
return gpu.getTensor(original, Precision.Double, MemoryType.Managed, false);
});
logger.info(String.format("Wrote %s in %.4f seconds, Device %d: %s", Arrays.toString(dimensions), timedResult.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
return CudaTensorList.wrap(timedResult.result, length, dimensions, Precision.Double);
}, original));
CudnnHandle.forEach(ctx -> {
@Nonnull TimedResult<TensorList> timedResult = TimedResult.time(() -> (mutableGpuData.get() instanceof CudaTensorList) ? ((CudaTensorList) mutableGpuData.get()).getHeapCopy() : mutableGpuData.get());
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nonnull TensorList minus = original.minus(timedResult.result);
double variance = minus.stream().mapToDouble(x -> Arrays.stream(x.getData()).map(Math::abs).max().getAsDouble()).max().getAsDouble();
minus.freeRef();
return variance < tolerance;
});
logger.info(String.format("Read %s in %.4f seconds and verified in %.4fs using device %d: %s", Arrays.toString(dimensions), timedResult.seconds(), timedVerify.seconds(), ctx.getDeviceId(), CudaDevice.getDeviceName(ctx.getDeviceId())));
if (!timedVerify.result)
Assert.assertTrue(original.prettyPrint() + " != " + timedResult.result.prettyPrint(), timedVerify.result);
timedResult.result.freeRef();
});
@Nonnull TimedResult<List<TensorList>> accumulantTiming = TimedResult.time(() -> IntStream.range(0, accumulations).mapToObj(x -> factory.get()).collect(Collectors.toList()));
logger.info(String.format("Calculated accumulant in %.4fsec", accumulantTiming.seconds()));
List<TensorList> accumulants = accumulantTiming.result;
accumulants.stream().forEach(accumulant -> {
CudaSystem.run(gpu -> {
@Nonnull TimedResult<TensorList> timedWrite = TimedResult.time(() -> {
return CudaTensorList.wrap(gpu.getTensor(accumulant, Precision.Double, MemoryType.Managed, false), length, dimensions, Precision.Double);
});
@Nonnull TimedResult<Void> timedAccumulation = TimedResult.time(() -> {
mutableGpuData.getAndUpdate(x -> x.add(timedWrite.result)).freeRef();
timedWrite.result.freeRef();
});
logger.info(String.format("Wrote in %.4f seconds and accumulated %s in %.4f seconds, Device %d: %s", timedAccumulation.seconds(), Arrays.toString(dimensions), timedWrite.seconds(), gpu.getDeviceId(), CudaDevice.getDeviceName(gpu.getDeviceId())));
}, accumulant);
});
@Nonnull TimedResult<TensorList> finalResultTiming = TimedResult.time(() -> {
return accumulants.stream().reduce((a, b) -> {
TensorList sum = a.addAndFree(b);
b.freeRef();
return sum;
}).map(x -> {
TensorList sum = x.add(original);
x.freeRef();
return sum;
}).orElseGet(() -> {
original.addRef();
return original;
});
});
original.freeRef();
logger.info(String.format("Calculated final data in %.4fsec", finalResultTiming.seconds()));
TensorList finalResult = finalResultTiming.result;
CudnnHandle.forEach(ctx -> {
@Nonnull TimedResult<Boolean> timedVerify = TimedResult.time(() -> {
@Nonnull TensorList minus = finalResult.minus(mutableGpuData.get());
double diffVal = minus.stream().mapToDouble(x -> {
double v = Arrays.stream(x.getData()).map(Math::abs).max().getAsDouble();
x.freeRef();
return v;
}).max().getAsDouble();
minus.freeRef();
return diffVal < tolerance;
});
logger.info(String.format("Read %s and verified in %.4fs using device %d: %s", Arrays.toString(dimensions), timedVerify.seconds(), ctx.getDeviceId(), CudaDevice.getDeviceName(ctx.getDeviceId())));
if (!timedVerify.result)
Assert.assertTrue(finalResult.prettyPrint() + " != " + mutableGpuData.get().prettyPrint(), timedVerify.result);
});
mutableGpuData.get().freeRef();
finalResult.freeRef();
});
}
use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class BasicTrainable method eval.
/**
* Eval point sample.
*
* @param list the list
* @param monitor the monitor
* @return the point sample
*/
@Nonnull
protected PointSample eval(@Nonnull final List<Tensor[]> list, @Nullable final TrainingMonitor monitor) {
@Nonnull final TimedResult<PointSample> timedResult = TimedResult.time(() -> {
final Result[] nnContext = BasicTrainable.getNNContext(list, mask);
final Result result = network.eval(nnContext);
for (@Nonnull Result nnResult : nnContext) {
nnResult.getData().freeRef();
nnResult.freeRef();
}
final TensorList resultData = result.getData();
@Nonnull final DeltaSet<Layer> deltaSet = new DeltaSet<Layer>();
@Nonnull StateSet<Layer> stateSet = null;
try {
final DoubleSummaryStatistics statistics = resultData.stream().flatMapToDouble(x -> {
double[] array = Arrays.stream(x.getData()).toArray();
x.freeRef();
return Arrays.stream(array);
}).summaryStatistics();
final double sum = statistics.getSum();
result.accumulate(deltaSet, 1.0);
stateSet = new StateSet<>(deltaSet);
// log.info(String.format("Evaluated to %s delta buffers, %s mag", DeltaSet<LayerBase>.getMap().size(), DeltaSet<LayerBase>.getMagnitude()));
return new PointSample(deltaSet, stateSet, sum, 0.0, list.size());
} finally {
if (null != stateSet)
stateSet.freeRef();
resultData.freeRefAsync();
result.freeRefAsync();
deltaSet.freeRefAsync();
}
});
if (null != monitor && verbosity() > 0) {
monitor.log(String.format("Device completed %s items in %.3f sec", list.size(), timedResult.timeNanos / 1e9));
}
@Nonnull PointSample normalize = timedResult.result.normalize();
timedResult.result.freeRef();
return normalize;
}
Aggregations