Search in sources :

Example 41 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class RepresentativeUncertainClustering method run.

/**
 * This run method will do the wrapping.
 *
 * Its called from {@link AbstractAlgorithm#run(Database)} and performs the
 * call to the algorithms particular run method as well as the storing and
 * comparison of the resulting Clusterings.
 *
 * @param database Database
 * @param relation Data relation of uncertain objects
 * @return Clustering result
 */
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
    ResultHierarchy hierarchy = database.getHierarchy();
    ArrayList<Clustering<?>> clusterings = new ArrayList<>();
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    // To collect samples
    Result samples = new BasicResult("Samples", "samples");
    // Step 1: Cluster sampled possible worlds:
    Random rand = random.getSingleThreadedRandom();
    FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
    for (int i = 0; i < numsamples; i++) {
        WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            store.put(iter, relation.get(iter).drawSample(rand));
        }
        clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
        LOG.incrementProcessed(sampleP);
    }
    LOG.ensureCompleted(sampleP);
    // Step 2: perform the meta clustering (on samples only).
    DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
    WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
    {
        Iterator<Clustering<?>> it2 = clusterings.iterator();
        for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
            datastore.put(iter, it2.next());
        }
    }
    assert (rids.size() == clusterings.size());
    // Build a relation, and a distance matrix.
    Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
    PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
    mat.initialize();
    ProxyDatabase d = new ProxyDatabase(rids, crel);
    d.getHierarchy().add(crel, mat);
    Clustering<?> c = metaAlgorithm.run(d);
    // Detach from database
    d.getHierarchy().remove(d, c);
    // Evaluation
    Result reps = new BasicResult("Representants", "representative");
    hierarchy.add(relation, reps);
    DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
    List<? extends Cluster<?>> cl = c.getAllClusters();
    List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
    for (Cluster<?> clus : cl) {
        double besttau = Double.POSITIVE_INFINITY;
        Clustering<?> bestc = null;
        for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
            double tau = 0.;
            Clustering<?> curc = crel.get(it1);
            for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
                if (DBIDUtil.equal(it1, it2)) {
                    continue;
                }
                double di = dq.distance(curc, it2);
                tau = di > tau ? di : tau;
            }
            // Cluster member with the least maximum distance.
            if (tau < besttau) {
                besttau = tau;
                bestc = curc;
            }
        }
        if (bestc == null) {
            // E.g. degenerate empty clusters
            continue;
        }
        // Global tau:
        double gtau = 0.;
        for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
            double di = dq.distance(bestc, it2);
            gtau = di > gtau ? di : gtau;
        }
        final double cprob = computeConfidence(clus.size(), crel.size());
        // Build an evaluation result
        hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
        evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
    }
    // Sort evaluated results by confidence:
    Collections.sort(evaluated, Collections.reverseOrder());
    for (DoubleObjPair<Clustering<?>> pair : evaluated) {
        // Attach parent relation (= sample) to the representative samples.
        for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
            hierarchy.add(reps, it.get());
        }
    }
    // Add the random samples below the representative results only:
    if (keep) {
        hierarchy.add(relation, samples);
    } else {
        hierarchy.removeSubtree(samples);
    }
    return c;
}
Also used : ArrayList(java.util.ArrayList) Result(de.lmu.ifi.dbs.elki.result.Result) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) Random(java.util.Random) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) Iterator(java.util.Iterator) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) PrecomputedDistanceMatrix(de.lmu.ifi.dbs.elki.index.distancematrix.PrecomputedDistanceMatrix) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 42 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ComputeKNNOutlierScores method run.

@Override
public void run() {
    final Database database = inputstep.getDatabase();
    final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
    // Ensure we don't go beyond the relation size:
    final int maxk = Math.min(this.maxk, relation.size() - 1);
    // Get a KNN query.
    final int lim = Math.min(maxk + 2, relation.size());
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
    // Precompute kNN:
    if (!(knnq instanceof PreprocessorKNNQuery)) {
        MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
        preproc.initialize();
        relation.getHierarchy().add(relation, preproc);
    }
    // Test that we now get a proper index query
    knnq = QueryUtil.getKNNQuery(relation, distf, lim);
    if (!(knnq instanceof PreprocessorKNNQuery)) {
        throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
    }
    // Warn for some known slow methods and large k:
    if (!isDisabled("LDOF") && maxk > 100) {
        LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
    }
    if (!isDisabled("FastABOD") && maxk > 100) {
        LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
    }
    if (!isDisabled("DWOF") && maxk > 100) {
        LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
    }
    final DBIDs ids = relation.getDBIDs();
    try (PrintStream fout = new PrintStream(outfile)) {
        // Control: print the DBIDs in case we are seeing an odd iteration
        // 
        fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
        // Label outlier result (reference)
        writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
        final int startk = (this.startk > 0) ? this.startk : this.stepk;
        final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
        final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
        // Output function:
        BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
        // KNN
        runForEachK(// 
        "KNN", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNOutlier<O>(distf, k).run(database, relation), out);
        // KNN Weight
        runForEachK(// 
        "KNNW", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
        // Run LOF
        runForEachK(// 
        "LOF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LOF<O>(k, distf).run(database, relation), out);
        // Run Simplified-LOF
        runForEachK(// 
        "SimplifiedLOF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new SimplifiedLOF<O>(k, distf).run(database, relation), out);
        // LoOP
        runForEachK(// 
        "LoOP", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
        // LDOF
        runForEachK(// 
        "LDOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new LDOF<O>(distf, k).run(database, relation), out);
        // Run ODIN
        runForEachK(// 
        "ODIN", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new ODIN<O>(distf, k).run(database, relation), out);
        // Run FastABOD
        runForEachK(// 
        "FastABOD", // 
        startkmin3, // 
        stepk, // 
        maxk, k -> // 
        new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
        // Run KDEOS with intrinsic dimensionality 2.
        runForEachK(// 
        "KDEOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> new // 
        KDEOS<O>(// 
        distf, // 
        k, // 
        k, // 
        GaussianKernelDensityFunction.KERNEL, // 
        0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), // 
        2).run(database, relation), out);
        // Run LDF
        runForEachK(// 
        "LDF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
        // Run INFLO
        runForEachK(// 
        "INFLO", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new INFLO<O>(distf, 1.0, k).run(database, relation), out);
        // Run COF
        runForEachK(// 
        "COF", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new COF<O>(k, distf).run(database, relation), out);
        // Run simple Intrinsic dimensionality
        runForEachK(// 
        "Intrinsic", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
        // Run IDOS
        runForEachK(// 
        "IDOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
        // Run simple kernel-density LOF variant
        runForEachK(// 
        "KDLOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
        // Run DWOF (need pairwise distances, too)
        runForEachK(// 
        "DWOF", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new DWOF<O>(distf, k, 1.1).run(database, relation), out);
        // Run LIC
        runForEachK(// 
        "LIC", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
        // Run VOV (requires a vector field).
        if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
            @SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
            @SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
            runForEachK(// 
            "VOV", // 
            startk, // 
            stepk, // 
            maxk, k -> // 
            new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
        }
        // Run KNN DD
        runForEachK(// 
        "KNNDD", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNDD<O>(distf, k).run(database, relation), out);
        // Run KNN SOS
        runForEachK(// 
        "KNNSOS", // 
        startk, // 
        stepk, // 
        maxk, k -> // 
        new KNNSOS<O>(distf, k).run(relation), out);
        // Run ISOS
        runForEachK(// 
        "ISOS", // 
        startkmin2, // 
        stepk, // 
        maxk, k -> // 
        new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
    } catch (FileNotFoundException e) {
        throw new AbortException("Cannot create output file.", e);
    }
}
Also used : PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) InputStep(de.lmu.ifi.dbs.elki.workflow.InputStep) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LDOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDOF) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) Reference(de.lmu.ifi.dbs.elki.utilities.documentation.Reference) COF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF) ObjectParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Locale(java.util.Locale) KNNWeightOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier) FastMath(net.jafama.FastMath) LDF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF) ByLabelOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.ByLabelOutlier) DistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction) FastABOD(de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD) OptionID(de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID) DistanceBasedAlgorithm(de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm) FormatUtil(de.lmu.ifi.dbs.elki.utilities.io.FormatUtil) ScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction) LoOP(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LoOP) TypeUtil(de.lmu.ifi.dbs.elki.data.type.TypeUtil) EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) AggregatedHillEstimator(de.lmu.ifi.dbs.elki.math.statistics.intrinsicdimensionality.AggregatedHillEstimator) PolynomialKernelFunction(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction) KNNDD(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD) FileNotFoundException(java.io.FileNotFoundException) ISOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) IntParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) Pattern(java.util.regex.Pattern) LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Parameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization) KNNOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier) KDEOS(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.KDEOS) QueryUtil(de.lmu.ifi.dbs.elki.database.QueryUtil) BiConsumer(java.util.function.BiConsumer) GaussianKernelDensityFunction(de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction) CommonConstraints(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) IntFunction(java.util.function.IntFunction) PrintStream(java.io.PrintStream) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) AbstractApplication(de.lmu.ifi.dbs.elki.application.AbstractApplication) IntrinsicDimensionalityOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier) File(java.io.File) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ODIN(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) DWOF(de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF) LocalIsolationCoefficient(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient) VarianceOfVolume(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.VarianceOfVolume) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) IDOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS) KNNSOS(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS) Logging(de.lmu.ifi.dbs.elki.logging.Logging) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) PatternParameter(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter) KNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery) IdentityScaling(de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling) PolynomialKernelFunction(de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) FileNotFoundException(java.io.FileNotFoundException) DWOF(de.lmu.ifi.dbs.elki.algorithm.outlier.DWOF) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) MaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor) KNNDD(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNDD) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) LDF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDF) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) IntrinsicDimensionalityOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IntrinsicDimensionalityOutlier) Database(de.lmu.ifi.dbs.elki.database.Database) COF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.COF) IDOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.IDOS) ODIN(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.ODIN) LoOP(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LoOP) PrintStream(java.io.PrintStream) LOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF) SimplifiedLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimplifiedLOF) SimpleKernelDensityLOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.SimpleKernelDensityLOF) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) KNNSOS(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNSOS) KNNWeightOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier) VarianceOfVolume(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.VarianceOfVolume) LDOF(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LDOF) ISOS(de.lmu.ifi.dbs.elki.algorithm.outlier.intrinsic.ISOS) INFLO(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.INFLO) KNNOutlier(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier) DistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction) EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) KDEOS(de.lmu.ifi.dbs.elki.algorithm.outlier.lof.KDEOS) PreprocessorKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery) LocalIsolationCoefficient(de.lmu.ifi.dbs.elki.algorithm.outlier.distance.LocalIsolationCoefficient) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) FastABOD(de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased.FastABOD)

Example 43 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class GreedyEnsembleExperiment method applyPrescaling.

/**
 * Prescale each vector (except when in {@code skip}) with the given scaling
 * function.
 *
 * @param scaling Scaling function
 * @param relation Relation to read
 * @param skip DBIDs to pass unmodified
 * @return New relation
 */
public static Relation<NumberVector> applyPrescaling(ScalingFunction scaling, Relation<NumberVector> relation, DBIDs skip) {
    if (scaling == null) {
        return relation;
    }
    NumberVector.Factory<NumberVector> factory = RelationUtil.getNumberVectorFactory(relation);
    DBIDs ids = relation.getDBIDs();
    WritableDataStore<NumberVector> contents = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, NumberVector.class);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        NumberVector v = relation.get(iter);
        double[] raw = v.toArray();
        if (!skip.contains(iter)) {
            applyScaling(raw, scaling);
        }
        contents.put(iter, factory.newNumberVector(raw, ArrayLikeUtil.DOUBLEARRAYADAPTER));
    }
    return new MaterializedRelation<>(relation.getDataTypeInformation(), ids, "rescaled", contents);
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 44 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class EvaluateRetrievalPerformance method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    // For storing the positive neighbors.
    ModifiableDBIDs posn = DBIDUtil.newHashSet();
    // Distance storage.
    ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
    // For counting labels seen in kNN
    Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
    // Statistics tracking
    double map = 0., mroc = 0.;
    double[] knnperf = new double[maxk];
    int samples = 0;
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        Object label = lrelation.get(iter);
        findMatches(posn, lrelation, label);
        if (posn.size() > 0) {
            computeDistances(nlist, iter, distQuery, relation);
            if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
                LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
            }
            map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
            mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
            KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
            samples += 1;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    if (samples < 1) {
        throw new AbortException("No object matched - are labels parsed correctly?");
    }
    if (!(map >= 0) || !(mroc >= 0)) {
        throw new AbortException("NaN in MAP/ROC.");
    }
    map /= samples;
    mroc /= samples;
    LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
    LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
    LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
    for (int k = 0; k < maxk; k++) {
        knnperf[k] = knnperf[k] / samples;
        LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
    }
    return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 45 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class TextWriter method writeClusterResult.

private void writeClusterResult(Database db, StreamFactory streamOpener, Clustering<Model> clustering, Cluster<Model> clus, List<Relation<?>> ra, NamingScheme naming) throws FileNotFoundException, IOException {
    String filename = null;
    if (naming != null) {
        filename = filenameFromLabel(naming.getNameFor(clus));
    } else {
        filename = "cluster";
    }
    PrintStream outStream = streamOpener.openStream(getFilename(clus, filename));
    TextWriterStream out = new TextWriterStream(outStream, writers, fallback);
    // Write cluster information
    out.commentPrintLn("Cluster: " + naming.getNameFor(clus));
    clus.writeToText(out, null);
    if (clustering.getClusterHierarchy().numParents(clus) > 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("Parents:");
        for (It<Cluster<Model>> iter = clustering.getClusterHierarchy().iterParents(clus); iter.valid(); iter.advance()) {
            buf.append(' ').append(naming.getNameFor(iter.get()));
        }
        out.commentPrintLn(buf.toString());
    }
    if (clustering.getClusterHierarchy().numChildren(clus) > 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("Children:");
        for (It<Cluster<Model>> iter = clustering.getClusterHierarchy().iterChildren(clus); iter.valid(); iter.advance()) {
            buf.append(' ').append(naming.getNameFor(iter.get()));
        }
        out.commentPrintLn(buf.toString());
    }
    out.flush();
    // print ids.
    DBIDs ids = clus.getIDs();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        printObject(out, db, iter, ra);
    }
    out.flush();
    streamOpener.closeStream(outStream);
}
Also used : PrintStream(java.io.PrintStream) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14