Search in sources :

Example 16 with DiVector

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForest method getAnomalyAttribution.

public DiVector getAnomalyAttribution(float[] point) {
    // getAnomalyScore
    if (!isOutputReady()) {
        return new DiVector(dimensions);
    IVisitorFactory<DiVector> visitorFactory = new VisitorFactory<>((tree, y) -> new AnomalyAttributionVisitor(tree.projectToTree(y), tree.getMass()), (tree, x) -> x.lift(tree::liftFromTree));
    BinaryOperator<DiVector> accumulator = DiVector::addToLeft;
    Function<DiVector, DiVector> finisher = x -> x.scale(1.0 / numberOfTrees);
    return traverseForest(transformToShingledPoint(point), visitorFactory, accumulator, finisher);
Also used : CommonUtils.checkNotNull( Arrays(java.util.Arrays) BiFunction(java.util.function.BiFunction) ParallelForestTraversalExecutor( Random(java.util.Random) ParallelForestUpdateExecutor( AbstractForestUpdateExecutor( IStateCoordinator( RandomCutTree( CommonUtils.toFloatArray( Neighbor( ConditionalSampleSummarizer( ImputeVisitor( NearNeighborVisitor( IBoundingBoxView( Collector( PointStoreCoordinator( AnomalyAttributionVisitor( OneSidedConvergingDoubleAccumulator( AnomalyScoreVisitor( AbstractForestTraversalExecutor( BinaryOperator(java.util.function.BinaryOperator) SequentialForestTraversalExecutor( List(java.util.List) Math.max(java.lang.Math.max) Optional(java.util.Optional) DensityOutput( CommonUtils.toDoubleArray( Precision( CompactSampler( SamplerPlusTree( ShingleBuilder( Function(java.util.function.Function) ArrayList(java.util.ArrayList) SimulatedTransductiveScalarScoreVisitor( PointStore( DynamicAttributionVisitor( ConvergingAccumulator( Config( SimpleInterpolationVisitor( InterpolationMeasure( IPointStore( SequentialForestUpdateExecutor( ArrayUtils( OneSidedConvergingDiVectorAccumulator( CommonUtils.checkArgument( DynamicScoreVisitor( DiVector( ITree( ConditionalTreeSample( ConditionalSampleSummary( Collections(java.util.Collections) IStreamSampler( DiVector( AnomalyAttributionVisitor(

Example 17 with DiVector

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForest method getApproximateDynamicAttribution.

 * Atrribution for dynamic sequential scoring; getL1Norm() should agree with
 * getDynamicScoringSequential
 * @param point                   input
 * @param precision               parameter to stop early stopping
 * @param highIsCritical          are high values anomalous (otherwise low
 *                                values are anomalous)
 * @param ignoreLeafMassThreshold we ignore leaves with mass equal/below *
 *                                threshold
 * @param seen                    function for scoring points that have been
 *                                seen before
 * @param unseen                  function for scoring points not seen in tree
 * @param newDamp                 dampening function based on duplicates
 * @return attribution DiVector of the score
public DiVector getApproximateDynamicAttribution(float[] point, double precision, boolean highIsCritical, int ignoreLeafMassThreshold, BiFunction<Double, Double, Double> seen, BiFunction<Double, Double, Double> unseen, BiFunction<Double, Double, Double> newDamp) {
    if (!isOutputReady()) {
        return new DiVector(dimensions);
    VisitorFactory<DiVector> visitorFactory = new VisitorFactory<>((tree, y) -> new DynamicAttributionVisitor(y, tree.getMass(), ignoreLeafMassThreshold, seen, unseen, newDamp), (tree, x) -> x.lift(tree::liftFromTree));
    ConvergingAccumulator<DiVector> accumulator = new OneSidedConvergingDiVectorAccumulator(dimensions, highIsCritical, precision, DEFAULT_APPROXIMATE_DYNAMIC_SCORE_MIN_VALUES_ACCEPTED, numberOfTrees);
    Function<DiVector, DiVector> finisher = vector -> vector.scale(1.0 / accumulator.getValuesAccepted());
    return traverseForest(transformToShingledPoint(point), visitorFactory, accumulator, finisher);
Also used : CommonUtils.checkNotNull( Arrays(java.util.Arrays) BiFunction(java.util.function.BiFunction) ParallelForestTraversalExecutor( Random(java.util.Random) ParallelForestUpdateExecutor( AbstractForestUpdateExecutor( IStateCoordinator( RandomCutTree( CommonUtils.toFloatArray( Neighbor( ConditionalSampleSummarizer( ImputeVisitor( NearNeighborVisitor( IBoundingBoxView( Collector( PointStoreCoordinator( AnomalyAttributionVisitor( OneSidedConvergingDoubleAccumulator( AnomalyScoreVisitor( AbstractForestTraversalExecutor( BinaryOperator(java.util.function.BinaryOperator) SequentialForestTraversalExecutor( List(java.util.List) Math.max(java.lang.Math.max) Optional(java.util.Optional) DensityOutput( CommonUtils.toDoubleArray( Precision( CompactSampler( SamplerPlusTree( ShingleBuilder( Function(java.util.function.Function) ArrayList(java.util.ArrayList) SimulatedTransductiveScalarScoreVisitor( PointStore( DynamicAttributionVisitor( ConvergingAccumulator( Config( SimpleInterpolationVisitor( InterpolationMeasure( IPointStore( SequentialForestUpdateExecutor( ArrayUtils( OneSidedConvergingDiVectorAccumulator( CommonUtils.checkArgument( DynamicScoreVisitor( DiVector( ITree( ConditionalTreeSample( ConditionalSampleSummary( Collections(java.util.Collections) IStreamSampler( DynamicAttributionVisitor( DiVector( OneSidedConvergingDiVectorAccumulator(

Example 18 with DiVector

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForest method getDynamicAttribution.

 * Same as above, but for dynamic scoring. See the params of
 * getDynamicScoreParallel
 * @param point                   point to be scored
 * @param ignoreLeafMassThreshold said threshold
 * @param seen                    score function for seen points
 * @param unseen                  score function for unseen points
 * @param newDamp                 dampening function for duplicates in the seen
 *                                function
 * @return dynamic scoring attribution DiVector
public DiVector getDynamicAttribution(float[] point, int ignoreLeafMassThreshold, BiFunction<Double, Double, Double> seen, BiFunction<Double, Double, Double> unseen, BiFunction<Double, Double, Double> newDamp) {
    if (!isOutputReady()) {
        return new DiVector(dimensions);
    VisitorFactory<DiVector> visitorFactory = new VisitorFactory<>((tree, y) -> new DynamicAttributionVisitor(tree.projectToTree(y), tree.getMass(), ignoreLeafMassThreshold, seen, unseen, newDamp), (tree, x) -> x.lift(tree::liftFromTree));
    BinaryOperator<DiVector> accumulator = DiVector::addToLeft;
    Function<DiVector, DiVector> finisher = x -> x.scale(1.0 / numberOfTrees);
    return traverseForest(transformToShingledPoint(point), visitorFactory, accumulator, finisher);
Also used : CommonUtils.checkNotNull( Arrays(java.util.Arrays) BiFunction(java.util.function.BiFunction) ParallelForestTraversalExecutor( Random(java.util.Random) ParallelForestUpdateExecutor( AbstractForestUpdateExecutor( IStateCoordinator( RandomCutTree( CommonUtils.toFloatArray( Neighbor( ConditionalSampleSummarizer( ImputeVisitor( NearNeighborVisitor( IBoundingBoxView( Collector( PointStoreCoordinator( AnomalyAttributionVisitor( OneSidedConvergingDoubleAccumulator( AnomalyScoreVisitor( AbstractForestTraversalExecutor( BinaryOperator(java.util.function.BinaryOperator) SequentialForestTraversalExecutor( List(java.util.List) Math.max(java.lang.Math.max) Optional(java.util.Optional) DensityOutput( CommonUtils.toDoubleArray( Precision( CompactSampler( SamplerPlusTree( ShingleBuilder( Function(java.util.function.Function) ArrayList(java.util.ArrayList) SimulatedTransductiveScalarScoreVisitor( PointStore( DynamicAttributionVisitor( ConvergingAccumulator( Config( SimpleInterpolationVisitor( InterpolationMeasure( IPointStore( SequentialForestUpdateExecutor( ArrayUtils( OneSidedConvergingDiVectorAccumulator( CommonUtils.checkArgument( DynamicScoreVisitor( DiVector( ITree( ConditionalTreeSample( ConditionalSampleSummary( Collections(java.util.Collections) IStreamSampler( DynamicAttributionVisitor( DiVector(

Example 19 with DiVector

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForestBenchmark method attributionAndUpdate.

public RandomCutForest attributionAndUpdate(BenchmarkState state, Blackhole blackhole) {
    double[][] data =;
    forest = state.forest;
    DiVector vector = new DiVector(forest.getDimensions());
    for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
        vector = forest.getAnomalyAttribution(data[i]);
    return forest;
Also used : DiVector( Benchmark(org.openjdk.jmh.annotations.Benchmark) OperationsPerInvocation(org.openjdk.jmh.annotations.OperationsPerInvocation)

Example 20 with DiVector

use of in project random-cut-forest-by-aws by aws.

the class PredictorCorrector method detect.

 * the core of the predictor-corrector thresholding for shingled data points. It
 * uses a simple threshold provided by the basic thresholder. It first checks if
 * obvious effects of the present; and absent such, for repeated breaches, how
 * critical is the new current information
 * @param result                returns the augmented description
 * @param lastAnomalyDescriptor state of the computation for the last anomaly
 * @return the anomaly descriptor result (which has plausibly mutated)
protected AnomalyDescriptor detect(AnomalyDescriptor result, IRCFComputeDescriptor lastAnomalyDescriptor, RandomCutForest forest) {
    double[] point = result.getRCFPoint();
    if (point == null) {
        return result;
    double score = forest.getAnomalyScore(point);
    long internalTimeStamp = result.getInternalTimeStamp();
    if (score == 0) {
        return result;
    int shingleSize = result.getShingleSize();
    int baseDimensions = result.getDimension() / shingleSize;
    int startPosition = (shingleSize - 1) * baseDimensions;
    boolean previousIsPotentialAnomaly = thresholder.isInPotentialAnomaly();
         * We first check if the score is high enough to be considered as a candidate
         * anomaly. If not, which is hopefully 99% of the data, the computation is short
    if (thresholder.getAnomalyGrade(score, previousIsPotentialAnomaly) == 0) {
        // inHighScoreRegion = false;
        thresholder.update(score, score, 0, false);
        return result;
    // the score is now high enough to be considered an anomaly
    // inHighScoreRegion = true;
         * We now check if (1) we have another anomaly in the current shingle (2) have
         * predictions about what the values should have been and (3) replacing by those
         * "should have been" makes the anomaly score of the new shingled point low
         * enough to not be an anomaly. In this case we can "explain" the high score is
         * due to the past and do not need to vend anomaly -- because the most recent
         * point, on their own would not produce an anomalous shingle.
         * However, the strategy is only executable if there are (A) sufficiently many
         * observations and (B) enough data in each time point such that the forecast is
         * reasonable. While forecasts can be corrected for very low shingle sizes and
         * say 1d input, the allure of RCF is in the multivariate case. Even for 1d, a
         * shingleSize of 4 or larger would produce reasonable forecast for the purposes
         * of anomaly detection.
    int gap = (int) (internalTimeStamp - lastAnomalyDescriptor.getInternalTimeStamp());
    // the forecast may not be reasonable with less data
    boolean reasonableForecast = result.isReasonableForecast();
    if (reasonableForecast && lastAnomalyDescriptor.getRCFPoint() != null && lastAnomalyDescriptor.getExpectedRCFPoint() != null && gap > 0 && gap <= shingleSize) {
        double[] correctedPoint = applyBasicCorrector(point, gap, shingleSize, baseDimensions, lastAnomalyDescriptor);
        double correctedScore = forest.getAnomalyScore(correctedPoint);
        // we know we are looking previous anomalies
        if (thresholder.getAnomalyGrade(correctedScore, true) == 0) {
            // fixing the past makes this anomaly go away; nothing to do but process the
            // score
            // we will not change inHighScoreRegion however, because the score has been
            // larger
            thresholder.update(score, correctedScore, 0, false);
            return result;
         * We now check the most egregious values seen in the current timestamp, as
         * determined by attribution. Those locations provide information about (a)
         * which attributes and (b) what the values should have been. However, those
         * calculations of imputation only make sense when sufficient observations are
         * available.
    DiVector attribution = forest.getAnomalyAttribution(point);
    double[] newPoint = null;
    double newScore = score;
    DiVector newAttribution = null;
         * we now find the time slice, relative to the current time, which is indicative
         * of the high score. relativeIndex = 0 is current time. It is negative if the
         * most egregious attribution was due to the past values in the shingle
    int index = maxContribution(attribution, baseDimensions, -shingleSize) + 1;
    if (!previousIsPotentialAnomaly && trigger(attribution, gap, baseDimensions, null, false, lastAnomalyDescriptor)) {
        result.setAnomalyGrade(thresholder.getAnomalyGrade(score, false));
        thresholder.update(score, score, 0, true);
    } else {
             * we again check if the new input produces an anomaly/not on its own
        if (reasonableForecast) {
            newPoint = getExpectedPoint(attribution, startPosition, baseDimensions, point, forest);
            if (newPoint != null) {
                newAttribution = forest.getAnomalyAttribution(newPoint);
                newScore = forest.getAnomalyScore(newPoint);
        if (trigger(attribution, gap, baseDimensions, newAttribution, previousIsPotentialAnomaly, lastAnomalyDescriptor) && score > newScore) {
            result.setAnomalyGrade(thresholder.getAnomalyGrade(score, previousIsPotentialAnomaly));
            // current point
            index = 0;
            thresholder.update(score, newScore, 0, true);
        } else {
            // previousIsPotentialAnomaly is true now, but not calling it anomaly either
            thresholder.update(score, newScore, 0, true);
            return result;
    if (reasonableForecast) {
        // anomaly in the past and detected late; repositioning the computation
        // index 0 is current time
        startPosition = shingleSize * baseDimensions + (result.getRelativeIndex() - 1) * baseDimensions;
        newPoint = getExpectedPoint(result.getAttribution(), startPosition, baseDimensions, point, forest);
    return result;
Also used : DiVector(


DiVector ( Test (org.junit.jupiter.api.Test)11 SamplerPlusTree ( Random (java.util.Random)6 OneSidedConvergingDiVectorAccumulator ( CommonUtils.checkArgument ( CommonUtils.checkNotNull ( CommonUtils.toDoubleArray ( CommonUtils.toFloatArray ( AnomalyAttributionVisitor ( AnomalyScoreVisitor ( DynamicAttributionVisitor ( DynamicScoreVisitor ( SimulatedTransductiveScalarScoreVisitor ( Config ( Precision ( AbstractForestTraversalExecutor ( AbstractForestUpdateExecutor ( IStateCoordinator ( ParallelForestTraversalExecutor (