Search in sources :

Example 1 with KNNQuery

use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.

the class FlexibleLOF method run.

/**
 * Performs the Generalized LOF algorithm on the given database by calling
 * {@link #doRunInTime}.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return LOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
    Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
    KNNQuery<O> kNNRefer = pair.getFirst();
    KNNQuery<O> kNNReach = pair.getSecond();
    return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
}
Also used : PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) RKNNQuery(de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress)

Example 2 with KNNQuery

use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.

the class OnlineLOF method getKNNAndRkNNQueries.

/**
 * Get the kNN and rkNN queries for the algorithm.
 *
 * @param relation Data
 * @param stepprog Progress logger
 * @return the kNN and rkNN queries
 */
private Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
    DistanceQuery<O> drefQ = database.getDistanceQuery(relation, referenceDistanceFunction);
    // Use "HEAVY" flag, since this is an online algorithm
    KNNQuery<O> kNNRefer = database.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    RKNNQuery<O> rkNNRefer = database.getRKNNQuery(drefQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    // No optimized kNN query or RkNN query - use a preprocessor!
    if (kNNRefer == null || rkNNRefer == null) {
        if (stepprog != null) {
            stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
        }
        MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
        kNNRefer = preproc.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
        rkNNRefer = preproc.getRKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
        // add as index
        database.getHierarchy().add(relation, preproc);
    } else {
        if (stepprog != null) {
            stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
        }
    }
    DistanceQuery<O> dreachQ = database.getDistanceQuery(relation, reachabilityDistanceFunction);
    KNNQuery<O> kNNReach = database.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    RKNNQuery<O> rkNNReach = database.getRKNNQuery(dreachQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
    if (kNNReach == null || rkNNReach == null) {
        if (stepprog != null) {
            stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
        }
        ListParameterization config = new ListParameterization();
        config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
        config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
        MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
        kNNReach = preproc.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
        rkNNReach = preproc.getRKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
        // add as index
        database.getHierarchy().add(relation, preproc);
    }
    Pair<KNNQuery<O>, KNNQuery<O>> kNNPair = new Pair<>(kNNRefer, kNNReach);
    Pair<RKNNQuery<O>, RKNNQuery<O>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
    return new Pair<>(kNNPair, rkNNPair);
}
Also used : RKNNQuery(de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery) MaterializeKNNAndRKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNAndRKNNPreprocessor) PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) RKNNQuery(de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 3 with KNNQuery

use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.

the class ComputeKNNOutlierScores method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
    // Ensure we don't go beyond the relation size:
    final int maxk = Math.min(this.maxk, relation.size() - 1);
    // Get a KNN query.
    final int lim = Math.min(maxk + 2, relation.size());
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
    // Precompute kNN:
    if (!(knnq instanceof PreprocessorKNNQuery)) {
        MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
        preproc.initialize();
        relation.getHierarchy().add(relation, preproc);
    }
    // Test that we now get a proper index query
    knnq = QueryUtil.getKNNQuery(relation, distf, lim);
    if (!(knnq instanceof PreprocessorKNNQuery)) {
        throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
    }
    // Warn for some known slow methods and large k:
    if (!isDisabled("LDOF") && maxk > 100) {
        LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
    }
    if (!isDisabled("FastABOD") && maxk > 100) {
        LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
    }
    if (!isDisabled("DWOF") && maxk > 100) {
        LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
    }
    final DBIDs ids = relation.getDBIDs();
    try (PrintStream fout = new PrintStream(outfile)) {
        // Control: print the DBIDs in case we are seeing an odd iteration
        // 
        fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
        // Label outlier result (reference)
        writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
        final int startk = (this.startk > 0) ? this.startk : this.stepk;
        final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
        final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
        // Output function:
        BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
        // KNN
        runForEachK(// 
        "KNN", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNOutlier<O>(distf, k).run(database, relation), out);
        // KNN Weight
        runForEachK(// 
        "KNNW", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
        // Run LOF
        runForEachK(// 
        "LOF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LOF<O>(k, distf).run(database, relation), out);
        // Run Simplified-LOF
        runForEachK(// 
        "SimplifiedLOF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new SimplifiedLOF<O>(k, distf).run(database, relation), out);
        // LoOP
        runForEachK(// 
        "LoOP", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
        // LDOF
        runForEachK(// 
        "LDOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new LDOF<O>(distf, k).run(database, relation), out);
        // Run ODIN
        runForEachK(// 
        "ODIN", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new ODIN<O>(distf, k).run(database, relation), out);
        // Run FastABOD
        runForEachK(// 
        "FastABOD", // 
        startkmin3, // 
        stepk, // 
        maxk, k -> // 
        new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
        // Run KDEOS with intrinsic dimensionality 2.
        runForEachK(// 
        "KDEOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> new // 
        KDEOS<O>(// 
        distf, // 
        k, // 
        k, // 
        GaussianKernelDensityFunction.KERNEL, // 
        0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), // 
        2).run(database, relation), out);
        // Run LDF
        runForEachK(// 
        "LDF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
        // Run INFLO
        runForEachK(// 
        "INFLO", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new INFLO<O>(distf, 1.0, k).run(database, relation), out);
        // Run COF
        runForEachK(// 
        "COF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new COF<O>(k, distf).run(database, relation), out);
        // Run simple Intrinsic dimensionality
        runForEachK(// 
        "Intrinsic", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
        // Run IDOS
        runForEachK(// 
        "IDOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
        // Run simple kernel-density LOF variant
        runForEachK(// 
        "KDLOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
        // Run DWOF (need pairwise distances, too)
        runForEachK(// 
        "DWOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new DWOF<O>(distf, k, 1.1).run(database, relation), out);
        // Run LIC
        runForEachK(// 
        "LIC", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
        // Run VOV (requires a vector field).
        if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
            @SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
            @SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
            runForEachK(// 
            "VOV", // 
            startk, // 
            stepk, // 
            maxk, k -> // 
            new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
        }
        // Run KNN DD
        runForEachK(// 
        "KNNDD", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNDD<O>(distf, k).run(database, relation), out);
        // Run KNN SOS
        runForEachK(// 
        "KNNSOS", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNSOS<O>(distf, k).run(relation), out);
        // Run ISOS
        runForEachK(// 
        "ISOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
    } catch (FileNotFoundException e) {
        throw new AbortException("Cannot create output file.", e);
    }
}
Also used : PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) InputStep(de.lmu.ifi.dbs.elki.workflow.InputStep) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LDOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDOF) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) Reference(de.lmu.ifi.dbs.elki.utilities.documentation.Reference) COF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF) ObjectParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Locale(java.util.Locale) KNNWeightOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier) FastMath(net.jafama.FastMath) LDF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF) ByLabelOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.ByLabelOutlier) DistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction) FastABOD(de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD) OptionID(de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID) DistanceBasedAlgorithm(de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm) FormatUtil(de.lmu.ifi.dbs.elki.utilities.io.FormatUtil) ScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction) LoOP(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LoOP) TypeUtil(de.lmu.ifi.dbs.elki.data.type.TypeUtil) EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) AggregatedHillEstimator(de.lmu.ifi.dbs.elki.math.statistics.intrinsicdimensionality.AggregatedHillEstimator) PolynomialKernelFunction(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction) KNNDD(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD) FileNotFoundException(java.io.FileNotFoundException) ISOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) IntParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) Pattern(java.util.regex.Pattern) LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Parameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization) KNNOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier) KDEOS(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.KDEOS) QueryUtil(de.lmu.ifi.dbs.elki.database.QueryUtil) BiConsumer(java.util.function.BiConsumer) GaussianKernelDensityFunction(de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction) CommonConstraints(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) IntFunction(java.util.function.IntFunction) PrintStream(java.io.PrintStream) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) AbstractApplication(de.lmu.ifi.dbs.elki.application.AbstractApplication) IntrinsicDimensionalityOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier) File(java.io.File) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ODIN(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) DWOF(de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF) LocalIsolationCoefficient(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient) VarianceOfVolume(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.VarianceOfVolume) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) IDOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS) KNNSOS(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS) Logging(de.lmu.ifi.dbs.elki.logging.Logging) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) PatternParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) PolynomialKernelFunction(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) FileNotFoundException(java.io.FileNotFoundException) DWOF(de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) KNNDD(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) LDF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) IntrinsicDimensionalityOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier) Database(de.lmu.ifi.dbs.elki.database.Database) COF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF) IDOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS) ODIN(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN) LoOP(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LoOP) PrintStream(java.io.PrintStream) LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) KNNSOS(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS) KNNWeightOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier) VarianceOfVolume(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.VarianceOfVolume) LDOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDOF) ISOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) KNNOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier) DistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction) EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) KDEOS(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.KDEOS) PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) LocalIsolationCoefficient(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) FastABOD(de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD)

Example 4 with KNNQuery

use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.

the class LoOP method run.

/**
 * Performs the LoOP algorithm on the given database.
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
    Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
    KNNQuery<O> knnComp = pair.getFirst();
    KNNQuery<O> knnReach = pair.getSecond();
    // Assert we got something
    if (knnComp == null) {
        throw new AbortException("No kNN queries supported by database for comparison distance function.");
    }
    if (knnReach == null) {
        throw new AbortException("No kNN queries supported by database for density estimation distance function.");
    }
    // FIXME: tie handling!
    // Probabilistic distances
    WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    LOG.beginStep(stepprog, 3, "Computing pdists");
    computePDists(relation, knnReach, pdists);
    // Compute PLOF values.
    WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    LOG.beginStep(stepprog, 4, "Computing PLOF");
    double nplof = computePLOFs(relation, knnComp, pdists, plofs);
    // Normalize the outlier scores.
    DoubleMinMax mm = new DoubleMinMax();
    {
        // compute LOOP_SCORE of each db object
        LOG.beginStep(stepprog, 5, "Computing LoOP scores");
        FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
        final double norm = 1. / (nplof * MathUtil.SQRT2);
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
            plofs.putDouble(iditer, loop);
            mm.put(loop);
            LOG.incrementProcessed(progressLOOPs);
        }
        LOG.ensureCompleted(progressLOOPs);
    }
    LOG.setCompleted(stepprog);
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 5 with KNNQuery

use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.

the class KNNJoinTest method testLinearScan.

@Test
public void testLinearScan() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<NumberVector> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // Euclidean
    {
        DistanceQuery<NumberVector> dq = db.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
        KNNQuery<NumberVector> knnq = QueryUtil.getLinearScanKNNQuery(dq);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            meansize.put(knnq.getKNNForDBID(iditer, 2).size());
        }
        org.junit.Assert.assertEquals("Euclidean mean 2NN", mean2nnEuclid, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
    }
    // Manhattan
    {
        DistanceQuery<NumberVector> dq = db.getDistanceQuery(relation, ManhattanDistanceFunction.STATIC);
        KNNQuery<NumberVector> knnq = QueryUtil.getLinearScanKNNQuery(dq);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            meansize.put(knnq.getKNNForDBID(iditer, 2).size());
        }
        org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Test(org.junit.Test)

Aggregations

KNNQuery (de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 PreprocessorKNNQuery (de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 RKNNQuery (de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery)2 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)2 DistanceBasedAlgorithm (de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm)1 DWOF (de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF)1 FastABOD (de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD)1 KNNDD (de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD)1 KNNOutlier (de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier)1 KNNSOS (de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS)1 KNNWeightOutlier (de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier)1 LocalIsolationCoefficient (de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient)1 ODIN (de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN)1 IDOS (de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS)1 ISOS (de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS)1 IntrinsicDimensionalityOutlier (de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier)1 COF (de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF)1