use of com.amazon.randomcutforest.store.PointStore in project random-cut-forest-by-aws by aws.
the class RandomCutTreeTest method setUp.
@BeforeEach
public void setUp() {
rng = mock(Random.class);
PointStore pointStoreFloat = new PointStore.Builder().indexCapacity(100).capacity(100).initialSize(100).dimensions(2).build();
tree = RandomCutTree.builder().random(rng).centerOfMassEnabled(true).pointStoreView(pointStoreFloat).storeSequenceIndexesEnabled(true).dimension(2).build();
// Create the following tree structure (in the second diagram., backticks denote
// cuts)
// The leaf point 0,1 has mass 2, all other nodes have mass 1.
//
// /\
// / \
// -1,-1 / \
// / \
// /\ 1,1
// / \
// -1,0 0,1
//
//
// 0,1 1,1
// ----------*---------*
// | ` | ` |
// | ` | ` |
// | ` | ` |
// -1,0 *-------------------|
// | |
// |```````````````````|
// | |
// -1,-1 *--------------------
//
// We choose the insertion order and random draws carefully so that each split
// divides its parent in half.
// The random values are used to set the cut dimensions and values.
assertThrows(IllegalArgumentException.class, () -> tree.setBoundingBoxCacheFraction(-0.5));
assertThrows(IllegalArgumentException.class, () -> tree.setConfig("foo", 0));
assertThrows(IllegalArgumentException.class, () -> tree.getConfig("bar"));
assertEquals(tree.getConfig(Config.BOUNDING_BOX_CACHE_FRACTION), 1.0);
tree.setConfig(Config.BOUNDING_BOX_CACHE_FRACTION, 0.2);
assertEquals(pointStoreFloat.add(new float[] { -1, -1 }, 1), 0);
assertEquals(pointStoreFloat.add(new float[] { 1, 1 }, 2), 1);
assertEquals(pointStoreFloat.add(new float[] { -1, 0 }, 3), 2);
assertEquals(pointStoreFloat.add(new float[] { 0, 1 }, 4), 3);
assertEquals(pointStoreFloat.add(new float[] { 0, 1 }, 5), 4);
assertEquals(pointStoreFloat.add(new float[] { 0, 0 }, 6), 5);
assertThrows(IllegalStateException.class, () -> tree.deletePoint(0, 1));
tree.addPoint(0, 1);
when(rng.nextDouble()).thenReturn(0.625);
tree.addPoint(1, 2);
when(rng.nextDouble()).thenReturn(0.5);
tree.addPoint(2, 3);
when(rng.nextDouble()).thenReturn(0.25);
tree.addPoint(3, 4);
// add mass to 0,1
tree.addPoint(4, 5);
}
use of com.amazon.randomcutforest.store.PointStore in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapper method singlePrecisionForest.
public RandomCutForest singlePrecisionForest(RandomCutForest.Builder<?> builder, RandomCutForestState state, IPointStore<float[]> extPointStore, List<ITree<Integer, float[]>> extTrees, List<IStreamSampler<Integer>> extSamplers) {
checkArgument(builder != null, "builder cannot be null");
checkArgument(extTrees == null || extTrees.size() == state.getNumberOfTrees(), "incorrect number of trees");
checkArgument(extSamplers == null || extSamplers.size() == state.getNumberOfTrees(), "incorrect number of samplers");
checkArgument(extSamplers != null | state.isSaveSamplerStateEnabled(), " need samplers ");
checkArgument(extPointStore != null || state.isSaveCoordinatorStateEnabled(), " need coordinator state ");
Random random = builder.getRandom();
ComponentList<Integer, float[]> components = new ComponentList<>();
CompactRandomCutTreeContext context = new CompactRandomCutTreeContext();
IPointStore<float[]> pointStore = (extPointStore == null) ? new PointStoreMapper().toModel(state.getPointStoreState()) : extPointStore;
PointStoreCoordinator<float[]> coordinator = new PointStoreCoordinator<>(pointStore);
coordinator.setTotalUpdates(state.getTotalUpdates());
context.setPointStore(pointStore);
context.setMaxSize(state.getSampleSize());
RandomCutTreeMapper treeMapper = new RandomCutTreeMapper();
List<CompactRandomCutTreeState> treeStates = state.isSaveTreeStateEnabled() ? state.getCompactRandomCutTreeStates() : null;
CompactSamplerMapper samplerMapper = new CompactSamplerMapper();
List<CompactSamplerState> samplerStates = state.isSaveSamplerStateEnabled() ? state.getCompactSamplerStates() : null;
for (int i = 0; i < state.getNumberOfTrees(); i++) {
IStreamSampler<Integer> sampler = (extSamplers != null) ? extSamplers.get(i) : samplerMapper.toModel(samplerStates.get(i), random.nextLong());
ITree<Integer, float[]> tree;
if (extTrees != null) {
tree = extTrees.get(i);
} else if (treeStates != null) {
tree = treeMapper.toModel(treeStates.get(i), context, random.nextLong());
sampler.getSample().forEach(s -> tree.addPoint(s.getValue(), s.getSequenceIndex()));
tree.setConfig(Config.BOUNDING_BOX_CACHE_FRACTION, treeStates.get(i).getBoundingBoxCacheFraction());
} else {
// using boundingBoxCahce for the new tree
tree = new RandomCutTree.Builder().capacity(state.getSampleSize()).randomSeed(random.nextLong()).pointStoreView(pointStore).boundingBoxCacheFraction(state.getBoundingBoxCacheFraction()).centerOfMassEnabled(state.isCenterOfMassEnabled()).storeSequenceIndexesEnabled(state.isStoreSequenceIndexesEnabled()).build();
sampler.getSample().forEach(s -> tree.addPoint(s.getValue(), s.getSequenceIndex()));
}
components.add(new SamplerPlusTree<>(sampler, tree));
}
builder.precision(Precision.FLOAT_32);
return new RandomCutForest(builder, coordinator, components, random);
}
use of com.amazon.randomcutforest.store.PointStore in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapper method toModel.
/**
* Create a {@link RandomCutForest} instance from a
* {@link RandomCutForestState}. If the state contains tree states, then trees
* will be constructed from the tree state objects. Otherwise, empty trees are
* created and populated from the sampler data. The resulting forest should be
* equal in distribution to the forest that the state object was created from.
*
* @param state A Random Cut Forest state object.
* @param executionContext An executor context that will be used to initialize
* new executors in the Random Cut Forest. If this
* argument is null, then the mapper will look for an
* executor context in the state object.
* @param seed A random seed.
* @return A Random Cut Forest corresponding to the state object.
* @throws NullPointerException if both the {@code executorContext} method
* argument and the executor context field in the
* state object are null.
*/
public RandomCutForest toModel(RandomCutForestState state, ExecutionContext executionContext, long seed) {
ExecutionContext ec;
if (executionContext != null) {
ec = executionContext;
} else {
checkNotNull(state.getExecutionContext(), "The executor context in the state object is null, an executor context must be passed explicitly to toModel()");
ec = state.getExecutionContext();
}
RandomCutForest.Builder<?> builder = RandomCutForest.builder().numberOfTrees(state.getNumberOfTrees()).dimensions(state.getDimensions()).timeDecay(state.getTimeDecay()).sampleSize(state.getSampleSize()).centerOfMassEnabled(state.isCenterOfMassEnabled()).outputAfter(state.getOutputAfter()).parallelExecutionEnabled(ec.isParallelExecutionEnabled()).threadPoolSize(ec.getThreadPoolSize()).storeSequenceIndexesEnabled(state.isStoreSequenceIndexesEnabled()).shingleSize(state.getShingleSize()).boundingBoxCacheFraction(state.getBoundingBoxCacheFraction()).compact(state.isCompact()).internalShinglingEnabled(state.isInternalShinglingEnabled()).randomSeed(seed);
if (Precision.valueOf(state.getPrecision()) == Precision.FLOAT_32) {
return singlePrecisionForest(builder, state, null, null, null);
}
Random random = builder.getRandom();
PointStore pointStore = new PointStoreMapper().convertFromDouble(state.getPointStoreState());
ComponentList<Integer, float[]> components = new ComponentList<>();
PointStoreCoordinator<float[]> coordinator = new PointStoreCoordinator<>(pointStore);
coordinator.setTotalUpdates(state.getTotalUpdates());
CompactRandomCutTreeContext context = new CompactRandomCutTreeContext();
context.setPointStore(pointStore);
context.setMaxSize(state.getSampleSize());
checkArgument(state.isSaveSamplerStateEnabled(), " conversion cannot proceed without samplers");
List<CompactSamplerState> samplerStates = state.getCompactSamplerStates();
CompactSamplerMapper samplerMapper = new CompactSamplerMapper();
for (int i = 0; i < state.getNumberOfTrees(); i++) {
CompactSampler compactData = samplerMapper.toModel(samplerStates.get(i));
RandomCutTree tree = RandomCutTree.builder().capacity(state.getSampleSize()).pointStoreView(pointStore).storeSequenceIndexesEnabled(state.isStoreSequenceIndexesEnabled()).outputAfter(state.getOutputAfter()).centerOfMassEnabled(state.isCenterOfMassEnabled()).randomSeed(random.nextLong()).build();
CompactSampler sampler = CompactSampler.builder().capacity(state.getSampleSize()).timeDecay(state.getTimeDecay()).randomSeed(random.nextLong()).build();
sampler.setMaxSequenceIndex(compactData.getMaxSequenceIndex());
sampler.setMostRecentTimeDecayUpdate(compactData.getMostRecentTimeDecayUpdate());
for (Weighted<Integer> sample : compactData.getWeightedSample()) {
Integer reference = sample.getValue();
Integer newReference = tree.addPoint(reference, sample.getSequenceIndex());
if (newReference.intValue() != reference.intValue()) {
pointStore.incrementRefCount(newReference);
pointStore.decrementRefCount(reference);
}
sampler.addPoint(newReference, sample.getWeight(), sample.getSequenceIndex());
}
components.add(new SamplerPlusTree<>(sampler, tree));
}
return new RandomCutForest(builder, coordinator, components, random);
}
use of com.amazon.randomcutforest.store.PointStore in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapper method toState.
/**
* Create a {@link RandomCutForestState} object representing the state of the
* given forest. If the forest is compact and the {@code saveTreeState} flag is
* set to true, then structure of the trees in the forest will be included in
* the state object. If the flag is set to false, then the state object will
* only contain the sampler data for each tree. If the
* {@code saveExecutorContext} is true, then the executor context will be
* included in the state object.
*
* @param forest A Random Cut Forest whose state we want to capture.
* @return a {@link RandomCutForestState} object representing the state of the
* given forest.
* @throws IllegalArgumentException if the {@code saveTreeState} flag is true
* and the forest is not compact.
*/
@Override
public RandomCutForestState toState(RandomCutForest forest) {
if (saveTreeStateEnabled) {
checkArgument(forest.isCompact(), "tree state cannot be saved for noncompact forests");
}
RandomCutForestState state = new RandomCutForestState();
state.setNumberOfTrees(forest.getNumberOfTrees());
state.setDimensions(forest.getDimensions());
state.setTimeDecay(forest.getTimeDecay());
state.setSampleSize(forest.getSampleSize());
state.setShingleSize(forest.getShingleSize());
state.setCenterOfMassEnabled(forest.isCenterOfMassEnabled());
state.setOutputAfter(forest.getOutputAfter());
state.setStoreSequenceIndexesEnabled(forest.isStoreSequenceIndexesEnabled());
state.setTotalUpdates(forest.getTotalUpdates());
state.setCompact(forest.isCompact());
state.setInternalShinglingEnabled(forest.isInternalShinglingEnabled());
state.setBoundingBoxCacheFraction(forest.getBoundingBoxCacheFraction());
state.setSaveSamplerStateEnabled(saveSamplerStateEnabled);
state.setSaveTreeStateEnabled(saveTreeStateEnabled);
state.setSaveCoordinatorStateEnabled(saveCoordinatorStateEnabled);
state.setPrecision(forest.getPrecision().name());
state.setCompressed(compressionEnabled);
state.setPartialTreeState(partialTreeStateEnabled);
if (saveExecutorContextEnabled) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.setParallelExecutionEnabled(forest.isParallelExecutionEnabled());
executionContext.setThreadPoolSize(forest.getThreadPoolSize());
state.setExecutionContext(executionContext);
}
if (saveCoordinatorStateEnabled) {
PointStoreCoordinator<?> pointStoreCoordinator = (PointStoreCoordinator<?>) forest.getUpdateCoordinator();
PointStoreMapper mapper = new PointStoreMapper();
mapper.setCompressionEnabled(compressionEnabled);
mapper.setNumberOfTrees(forest.getNumberOfTrees());
PointStoreState pointStoreState = mapper.toState((PointStore) pointStoreCoordinator.getStore());
state.setPointStoreState(pointStoreState);
}
List<CompactSamplerState> samplerStates = null;
if (saveSamplerStateEnabled) {
samplerStates = new ArrayList<>();
}
List<ITree<Integer, ?>> trees = null;
if (saveTreeStateEnabled) {
trees = new ArrayList<>();
}
CompactSamplerMapper samplerMapper = new CompactSamplerMapper();
samplerMapper.setCompressionEnabled(compressionEnabled);
for (IComponentModel<?, ?> component : forest.getComponents()) {
SamplerPlusTree<Integer, ?> samplerPlusTree = (SamplerPlusTree<Integer, ?>) component;
CompactSampler sampler = (CompactSampler) samplerPlusTree.getSampler();
if (samplerStates != null) {
samplerStates.add(samplerMapper.toState(sampler));
}
if (trees != null) {
trees.add(samplerPlusTree.getTree());
}
}
state.setCompactSamplerStates(samplerStates);
if (trees != null) {
RandomCutTreeMapper treeMapper = new RandomCutTreeMapper();
List<CompactRandomCutTreeState> treeStates = trees.stream().map(t -> treeMapper.toState((RandomCutTree) t)).collect(Collectors.toList());
state.setCompactRandomCutTreeStates(treeStates);
}
return state;
}
use of com.amazon.randomcutforest.store.PointStore in project random-cut-forest-by-aws by aws.
the class RandomCutForestShingledFunctionalTest method InternalShinglingTest.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void InternalShinglingTest(boolean rotation) {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 2;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
System.out.println(seed);
// test is exact equality, reducing the number of trials
int numTrials = 1;
int length = 4000 * sampleSize;
for (int i = 0; i < numTrials; i++) {
RandomCutForest first = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).internalRotationEnabled(rotation).shingleSize(shingleSize).build();
RandomCutForest second = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(shingleSize).build();
RandomCutForest third = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(1).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 5, seed + i, baseDimensions);
double[][] shingledData = generateShingledData(dataWithKeys.data, shingleSize, baseDimensions, rotation);
assertEquals(shingledData.length, dataWithKeys.data.length - shingleSize + 1);
int count = shingleSize - 1;
// insert initial points
for (int j = 0; j < shingleSize - 1; j++) {
first.update(dataWithKeys.data[j]);
}
for (int j = 0; j < shingledData.length; j++) {
// validate equality of points
for (int y = 0; y < baseDimensions; y++) {
int position = (rotation) ? (count % shingleSize) : shingleSize - 1;
assertEquals(dataWithKeys.data[count][y], shingledData[j][position * baseDimensions + y], 1e-10);
}
double firstResult = first.getAnomalyScore(dataWithKeys.data[count]);
first.update(dataWithKeys.data[count]);
++count;
double secondResult = second.getAnomalyScore(shingledData[j]);
second.update(shingledData[j]);
double thirdResult = third.getAnomalyScore(shingledData[j]);
third.update(shingledData[j]);
assertEquals(firstResult, secondResult, 1e-10);
assertEquals(secondResult, thirdResult, 1e-10);
}
PointStore store = (PointStore) first.getUpdateCoordinator().getStore();
assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
store = (PointStore) second.getUpdateCoordinator().getStore();
assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
store = (PointStore) third.getUpdateCoordinator().getStore();
assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
}
}
Aggregations