use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapperTest method testRoundTripForCompactForest.
@ParameterizedTest
@MethodSource("compactForestProvider")
public void testRoundTripForCompactForest(RandomCutForest forest) {
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
forest.update(point);
}
RandomCutForest forest2 = mapper.toModel(mapper.toState(forest));
assertCompactForestEquals(forest, forest2);
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class SimpleRunner method prepareAlgorithm.
/**
* Set up the internal RandomCutForest instance and line transformer.
*
* @param dimensions The number of dimensions in the input data.
*/
protected void prepareAlgorithm(int dimensions) {
pointBuffer = new double[dimensions];
shingleBuilder = new ShingleBuilder(dimensions, argumentParser.getShingleSize(), argumentParser.getShingleCyclic());
shingleBuffer = new double[shingleBuilder.getShingledPointSize()];
RandomCutForest forest = RandomCutForest.builder().numberOfTrees(argumentParser.getNumberOfTrees()).sampleSize(argumentParser.getSampleSize()).dimensions(shingleBuilder.getShingledPointSize()).timeDecay(argumentParser.getTimeDecay()).randomSeed(argumentParser.getRandomSeed()).build();
algorithm = algorithmInitializer.apply(forest);
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class V1JsonToV3StateConverterTest method testConvert.
@ParameterizedTest
@MethodSource("args")
public void testConvert(V1JsonResource jsonResource, Precision precision) {
String resource = jsonResource.getResource();
try (InputStream is = V1JsonToV3StateConverterTest.class.getResourceAsStream(jsonResource.getResource());
BufferedReader rr = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
StringBuilder b = new StringBuilder();
String line;
while ((line = rr.readLine()) != null) {
b.append(line);
}
String json = b.toString();
RandomCutForestState state = converter.convert(json, precision);
assertEquals(jsonResource.getDimensions(), state.getDimensions());
assertEquals(jsonResource.getNumberOfTrees(), state.getNumberOfTrees());
assertEquals(jsonResource.getSampleSize(), state.getSampleSize());
RandomCutForest forest = new RandomCutForestMapper().toModel(state, 0);
assertEquals(jsonResource.getDimensions(), forest.getDimensions());
assertEquals(jsonResource.getNumberOfTrees(), forest.getNumberOfTrees());
assertEquals(jsonResource.getSampleSize(), forest.getSampleSize());
// perform a simple validation of the deserialized forest by update and scoring
// with a few points
Random random = new Random(0);
for (int i = 0; i < 100; i++) {
double[] point = getPoint(jsonResource.getDimensions(), random);
double score = forest.getAnomalyScore(point);
assertTrue(score > 0);
forest.update(point);
}
String newString = new ObjectMapper().writeValueAsString(new RandomCutForestMapper().toState(forest));
System.out.println(" Old size " + json.length() + ", new Size " + newString.length() + ", improvement factor " + json.length() / newString.length());
} catch (IOException e) {
fail("Unable to load JSON resource");
}
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method ExternalShinglingTest.
@Test
public void ExternalShinglingTest() {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
// just once since testing exact equality
int numTrials = 1;
int length = 400 * sampleSize;
for (int i = 0; i < numTrials; i++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).shingleSize(shingleSize).randomSeed(seed).build();
RandomCutForest copyForest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).shingleSize(1).randomSeed(seed).build();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(1).anomalyRate(0.01).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(length, 50, shingleSize, baseDimensions, seed);
int gradeDifference = 0;
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
assertEquals(firstResult.getRCFScore(), copyForest.getAnomalyScore(point), 1e-10);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
if ((firstResult.getAnomalyGrade() > 0) != (secondResult.getAnomalyGrade() > 0)) {
++gradeDifference;
// thresholded random cut forest uses shingle size in the corrector step
// this is supposed to be different
}
forest.update(point);
copyForest.update(point);
}
assertTrue(gradeDifference > 0);
}
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class V1JsonToV3StateConverterTest method testMerge.
@ParameterizedTest
@MethodSource("args")
public void testMerge(V1JsonResource jsonResource, Precision precision) {
String resource = jsonResource.getResource();
try (InputStream is = V1JsonToV3StateConverterTest.class.getResourceAsStream(jsonResource.getResource());
BufferedReader rr = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
StringBuilder b = new StringBuilder();
String line;
while ((line = rr.readLine()) != null) {
b.append(line);
}
String json = b.toString();
int number = new Random().nextInt(10) + 1;
int testNumberOfTrees = Math.min(100, 1 + new Random().nextInt(number * jsonResource.getNumberOfTrees() - 1));
ArrayList<String> models = new ArrayList<>();
for (int i = 0; i < number; i++) {
models.add(json);
}
RandomCutForestState state = converter.convert(models, testNumberOfTrees, precision).get();
assertEquals(jsonResource.getDimensions(), state.getDimensions());
assertEquals(testNumberOfTrees, state.getNumberOfTrees());
assertEquals(jsonResource.getSampleSize(), state.getSampleSize());
RandomCutForest forest = new RandomCutForestMapper().toModel(state, 0);
assertEquals(jsonResource.getDimensions(), forest.getDimensions());
assertEquals(testNumberOfTrees, forest.getNumberOfTrees());
assertEquals(jsonResource.getSampleSize(), forest.getSampleSize());
// perform a simple validation of the deserialized forest by update and scoring
// with a few points
Random random = new Random(0);
for (int i = 0; i < 100; i++) {
double[] point = getPoint(jsonResource.getDimensions(), random);
double score = forest.getAnomalyScore(point);
assertTrue(score > 0);
forest.update(point);
}
int expectedSize = (int) Math.floor(1.0 * testNumberOfTrees * json.length() / (number * jsonResource.getNumberOfTrees()));
String newString = new ObjectMapper().writeValueAsString(new RandomCutForestMapper().toState(forest));
System.out.println(" Copied " + number + " times, old number of trees " + jsonResource.getNumberOfTrees() + ", new trees " + testNumberOfTrees + ", Expected Old size " + expectedSize + ", new Size " + newString.length());
} catch (IOException e) {
fail("Unable to load JSON resource");
}
}
Aggregations