use of com.amazon.randomcutforest.sampler.CompactSampler in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithDynamicLambda method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
double saveLambda = forest.getTimeDecay();
forest.setTimeDecay(10 * forest.getTimeDecay());
forest2.setTimeDecay(10 * forest2.getTimeDecay());
for (int i = 0; i < numberOfTrees; i++) {
CompactSampler sampler = (CompactSampler) ((SamplerPlusTree) forest.getComponents().get(i)).getSampler();
CompactSampler sampler2 = (CompactSampler) ((SamplerPlusTree) forest2.getComponents().get(i)).getSampler();
if (sampler.getMaxSequenceIndex() != sampler2.getMaxSequenceIndex()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler.getMostRecentTimeDecayUpdate() != sampler2.getMostRecentTimeDecayUpdate()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler2.getMostRecentTimeDecayUpdate() != dataSize - 1) {
throw new IllegalStateException("Incorrect sampler state");
}
}
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.sampler.CompactSampler in project random-cut-forest-by-aws by aws.
the class CompactSamplerMapperTest method samplerProvider.
public static Stream<Arguments> samplerProvider() {
CompactSampler emptySampler1 = CompactSampler.builder().capacity(sampleSize).timeDecay(lambda).randomSeed(seed).storeSequenceIndexesEnabled(false).build();
CompactSampler emptySampler2 = CompactSampler.builder().capacity(sampleSize).timeDecay(lambda).randomSeed(seed).storeSequenceIndexesEnabled(true).build();
return Stream.concat(nonemptySamplerProvider(), Stream.of(Arguments.of("empty sampler without sequence indexes", emptySampler1), Arguments.of("empty sampler with sequence indexes", emptySampler2)));
}
use of com.amazon.randomcutforest.sampler.CompactSampler in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapper method toModel.
/**
* Create a {@link RandomCutForest} instance from a
* {@link RandomCutForestState}. If the state contains tree states, then trees
* will be constructed from the tree state objects. Otherwise, empty trees are
* created and populated from the sampler data. The resulting forest should be
* equal in distribution to the forest that the state object was created from.
*
* @param state A Random Cut Forest state object.
* @param executionContext An executor context that will be used to initialize
* new executors in the Random Cut Forest. If this
* argument is null, then the mapper will look for an
* executor context in the state object.
* @param seed A random seed.
* @return A Random Cut Forest corresponding to the state object.
* @throws NullPointerException if both the {@code executorContext} method
* argument and the executor context field in the
* state object are null.
*/
public RandomCutForest toModel(RandomCutForestState state, ExecutionContext executionContext, long seed) {
ExecutionContext ec;
if (executionContext != null) {
ec = executionContext;
} else {
checkNotNull(state.getExecutionContext(), "The executor context in the state object is null, an executor context must be passed explicitly to toModel()");
ec = state.getExecutionContext();
}
RandomCutForest.Builder<?> builder = RandomCutForest.builder().numberOfTrees(state.getNumberOfTrees()).dimensions(state.getDimensions()).timeDecay(state.getTimeDecay()).sampleSize(state.getSampleSize()).centerOfMassEnabled(state.isCenterOfMassEnabled()).outputAfter(state.getOutputAfter()).parallelExecutionEnabled(ec.isParallelExecutionEnabled()).threadPoolSize(ec.getThreadPoolSize()).storeSequenceIndexesEnabled(state.isStoreSequenceIndexesEnabled()).shingleSize(state.getShingleSize()).boundingBoxCacheFraction(state.getBoundingBoxCacheFraction()).compact(state.isCompact()).internalShinglingEnabled(state.isInternalShinglingEnabled()).randomSeed(seed);
if (Precision.valueOf(state.getPrecision()) == Precision.FLOAT_32) {
return singlePrecisionForest(builder, state, null, null, null);
}
Random random = builder.getRandom();
PointStore pointStore = new PointStoreMapper().convertFromDouble(state.getPointStoreState());
ComponentList<Integer, float[]> components = new ComponentList<>();
PointStoreCoordinator<float[]> coordinator = new PointStoreCoordinator<>(pointStore);
coordinator.setTotalUpdates(state.getTotalUpdates());
CompactRandomCutTreeContext context = new CompactRandomCutTreeContext();
context.setPointStore(pointStore);
context.setMaxSize(state.getSampleSize());
checkArgument(state.isSaveSamplerStateEnabled(), " conversion cannot proceed without samplers");
List<CompactSamplerState> samplerStates = state.getCompactSamplerStates();
CompactSamplerMapper samplerMapper = new CompactSamplerMapper();
for (int i = 0; i < state.getNumberOfTrees(); i++) {
CompactSampler compactData = samplerMapper.toModel(samplerStates.get(i));
RandomCutTree tree = RandomCutTree.builder().capacity(state.getSampleSize()).pointStoreView(pointStore).storeSequenceIndexesEnabled(state.isStoreSequenceIndexesEnabled()).outputAfter(state.getOutputAfter()).centerOfMassEnabled(state.isCenterOfMassEnabled()).randomSeed(random.nextLong()).build();
CompactSampler sampler = CompactSampler.builder().capacity(state.getSampleSize()).timeDecay(state.getTimeDecay()).randomSeed(random.nextLong()).build();
sampler.setMaxSequenceIndex(compactData.getMaxSequenceIndex());
sampler.setMostRecentTimeDecayUpdate(compactData.getMostRecentTimeDecayUpdate());
for (Weighted<Integer> sample : compactData.getWeightedSample()) {
Integer reference = sample.getValue();
Integer newReference = tree.addPoint(reference, sample.getSequenceIndex());
if (newReference.intValue() != reference.intValue()) {
pointStore.incrementRefCount(newReference);
pointStore.decrementRefCount(reference);
}
sampler.addPoint(newReference, sample.getWeight(), sample.getSequenceIndex());
}
components.add(new SamplerPlusTree<>(sampler, tree));
}
return new RandomCutForest(builder, coordinator, components, random);
}
use of com.amazon.randomcutforest.sampler.CompactSampler in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapper method toState.
/**
* Create a {@link RandomCutForestState} object representing the state of the
* given forest. If the forest is compact and the {@code saveTreeState} flag is
* set to true, then structure of the trees in the forest will be included in
* the state object. If the flag is set to false, then the state object will
* only contain the sampler data for each tree. If the
* {@code saveExecutorContext} is true, then the executor context will be
* included in the state object.
*
* @param forest A Random Cut Forest whose state we want to capture.
* @return a {@link RandomCutForestState} object representing the state of the
* given forest.
* @throws IllegalArgumentException if the {@code saveTreeState} flag is true
* and the forest is not compact.
*/
@Override
public RandomCutForestState toState(RandomCutForest forest) {
if (saveTreeStateEnabled) {
checkArgument(forest.isCompact(), "tree state cannot be saved for noncompact forests");
}
RandomCutForestState state = new RandomCutForestState();
state.setNumberOfTrees(forest.getNumberOfTrees());
state.setDimensions(forest.getDimensions());
state.setTimeDecay(forest.getTimeDecay());
state.setSampleSize(forest.getSampleSize());
state.setShingleSize(forest.getShingleSize());
state.setCenterOfMassEnabled(forest.isCenterOfMassEnabled());
state.setOutputAfter(forest.getOutputAfter());
state.setStoreSequenceIndexesEnabled(forest.isStoreSequenceIndexesEnabled());
state.setTotalUpdates(forest.getTotalUpdates());
state.setCompact(forest.isCompact());
state.setInternalShinglingEnabled(forest.isInternalShinglingEnabled());
state.setBoundingBoxCacheFraction(forest.getBoundingBoxCacheFraction());
state.setSaveSamplerStateEnabled(saveSamplerStateEnabled);
state.setSaveTreeStateEnabled(saveTreeStateEnabled);
state.setSaveCoordinatorStateEnabled(saveCoordinatorStateEnabled);
state.setPrecision(forest.getPrecision().name());
state.setCompressed(compressionEnabled);
state.setPartialTreeState(partialTreeStateEnabled);
if (saveExecutorContextEnabled) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.setParallelExecutionEnabled(forest.isParallelExecutionEnabled());
executionContext.setThreadPoolSize(forest.getThreadPoolSize());
state.setExecutionContext(executionContext);
}
if (saveCoordinatorStateEnabled) {
PointStoreCoordinator<?> pointStoreCoordinator = (PointStoreCoordinator<?>) forest.getUpdateCoordinator();
PointStoreMapper mapper = new PointStoreMapper();
mapper.setCompressionEnabled(compressionEnabled);
mapper.setNumberOfTrees(forest.getNumberOfTrees());
PointStoreState pointStoreState = mapper.toState((PointStore) pointStoreCoordinator.getStore());
state.setPointStoreState(pointStoreState);
}
List<CompactSamplerState> samplerStates = null;
if (saveSamplerStateEnabled) {
samplerStates = new ArrayList<>();
}
List<ITree<Integer, ?>> trees = null;
if (saveTreeStateEnabled) {
trees = new ArrayList<>();
}
CompactSamplerMapper samplerMapper = new CompactSamplerMapper();
samplerMapper.setCompressionEnabled(compressionEnabled);
for (IComponentModel<?, ?> component : forest.getComponents()) {
SamplerPlusTree<Integer, ?> samplerPlusTree = (SamplerPlusTree<Integer, ?>) component;
CompactSampler sampler = (CompactSampler) samplerPlusTree.getSampler();
if (samplerStates != null) {
samplerStates.add(samplerMapper.toState(sampler));
}
if (trees != null) {
trees.add(samplerPlusTree.getTree());
}
}
state.setCompactSamplerStates(samplerStates);
if (trees != null) {
RandomCutTreeMapper treeMapper = new RandomCutTreeMapper();
List<CompactRandomCutTreeState> treeStates = trees.stream().map(t -> treeMapper.toState((RandomCutTree) t)).collect(Collectors.toList());
state.setCompactRandomCutTreeStates(treeStates);
}
return state;
}
use of com.amazon.randomcutforest.sampler.CompactSampler in project random-cut-forest-by-aws by aws.
the class CompactSamplerMapperTest method testRoundTripInvalidHeap.
@ParameterizedTest
@MethodSource("nonemptySamplerProvider")
public void testRoundTripInvalidHeap(String description, CompactSampler sampler) {
mapper.setValidateHeapEnabled(true);
CompactSamplerState state = mapper.toState(sampler);
// swap to weights in the weight array in order to violate the heap property
float[] weights = state.getWeight();
int index = state.getSize() / 4;
float temp = weights[index];
weights[index] = weights[2 * index + 1];
weights[2 * index + 1] = temp;
assertThrows(IllegalStateException.class, () -> mapper.toModel(state));
mapper.setValidateHeapEnabled(false);
CompactSampler sampler2 = mapper.toModel(state);
assertArrayEquals(sampler.getWeightArray(), sampler2.getWeightArray());
}
Aggregations