use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class TensorListTrainable method eval.
/**
* Eval point sample.
*
* @param list the list
* @param monitor the monitor
* @return the point sample
*/
@Nonnull
protected PointSample eval(@Nonnull final TensorList[] list, @Nullable final TrainingMonitor monitor) {
int inputs = data.length;
assert 0 < inputs;
int items = data[0].length();
assert 0 < items;
@Nonnull final TimedResult<PointSample> timedResult = TimedResult.time(() -> {
final Result[] nnContext = TensorListTrainable.getNNContext(list, mask);
final Result result = network.eval(nnContext);
for (@Nonnull Result nnResult : nnContext) {
nnResult.getData().freeRef();
nnResult.freeRef();
}
final TensorList resultData = result.getData();
final DoubleSummaryStatistics statistics = resultData.stream().flatMapToDouble(x -> {
double[] array = Arrays.stream(x.getData()).toArray();
x.freeRef();
return Arrays.stream(array);
}).summaryStatistics();
final double sum = statistics.getSum();
@Nonnull final DeltaSet<Layer> deltaSet = new DeltaSet<Layer>();
@Nonnull PointSample pointSample;
try {
result.accumulate(deltaSet, 1.0);
// log.info(String.format("Evaluated to %s delta buffers, %s mag", DeltaSet<LayerBase>.getMap().size(), DeltaSet<LayerBase>.getMagnitude()));
@Nonnull StateSet<Layer> stateSet = new StateSet<>(deltaSet);
pointSample = new PointSample(deltaSet, stateSet, sum, 0.0, items);
stateSet.freeRef();
} finally {
resultData.freeRef();
result.freeRef();
deltaSet.freeRef();
}
return pointSample;
});
if (null != monitor && verbosity() > 0) {
monitor.log(String.format("Device completed %s items in %.3f sec", items, timedResult.timeNanos / 1e9));
}
@Nonnull PointSample normalize = timedResult.result.normalize();
timedResult.result.freeRef();
return normalize;
}
use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class PerformanceTester method testPerformance.
/**
* Test learning performance double statistics.
*
* @param component the component
* @param inputPrototype the input prototype
* @return the double statistics
*/
@Nonnull
protected Tuple2<Double, Double> testPerformance(@Nonnull final Layer component, final Tensor... inputPrototype) {
final Tensor[][] data = IntStream.range(0, batches).mapToObj(x -> x).flatMap(x -> Stream.<Tensor[]>of(inputPrototype)).toArray(i -> new Tensor[i][]);
@Nonnull TimedResult<Result> timedEval = TimedResult.time(() -> {
Result[] input = ConstantResult.batchResultArray(data);
@Nullable Result result;
try {
result = component.eval(input);
} finally {
for (@Nonnull Result nnResult : input) {
nnResult.freeRef();
nnResult.getData().freeRef();
}
}
return result;
});
final Result result = timedEval.result;
@Nonnull final DeltaSet<Layer> buffer = new DeltaSet<Layer>();
try {
long timedBackprop = TimedResult.time(() -> {
@Nonnull TensorArray tensorArray = TensorArray.wrap(result.getData().stream().map(x -> {
return x.mapAndFree(v -> 1.0);
}).toArray(i -> new Tensor[i]));
result.accumulate(buffer, tensorArray);
assert tensorArray.currentRefCount() == 0;
return buffer;
}).timeNanos;
return new Tuple2<>(timedEval.timeNanos / 1e9, timedBackprop / 1e9);
} finally {
buffer.freeRef();
result.freeRef();
result.getData().freeRef();
}
}
use of com.simiacryptus.util.lang.TimedResult in project MindsEye by SimiaCryptus.
the class CudaTensorList method toHeap.
private TensorArray toHeap(final boolean avoidAllocations) {
CudaTensor gpuCopy = this.gpuCopy;
TimedResult<TensorArray> timedResult = TimedResult.time(() -> CudaDevice.run(gpu -> {
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
if (null == gpuCopy) {
if (null == heapCopy) {
throw new IllegalStateException("No data");
} else if (heapCopy.isFinalized()) {
throw new IllegalStateException("Local data has been freed");
}
}
gpuCopy.addRef();
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
try {
assert getPrecision() == gpuCopy.getPrecision();
assert getPrecision() == gpuCopy.descriptor.dataType;
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
final Tensor[] output = IntStream.range(0, getLength()).mapToObj(dataIndex -> new Tensor(getDimensions())).toArray(i -> new Tensor[i]);
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
for (int i = 0; i < getLength(); i++) {
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
gpuCopy.read(gpu, i, output[i], avoidAllocations);
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
}
return TensorArray.wrap(output);
} finally {
gpuCopy.freeRef();
}
}, this));
CudaTensorList.logger.debug(String.format("Read %s bytes in %.4f from Tensor %s on GPU at %s, created by %s", gpuCopy.size(), timedResult.seconds(), Integer.toHexString(System.identityHashCode(timedResult.result)), TestUtil.toString(TestUtil.getStackTrace()).replaceAll("\n", "\n\t"), TestUtil.toString(createdBy).replaceAll("\n", "\n\t")));
return timedResult.result;
}
Aggregations