Search in sources :

Example 36 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class AbstractIndexStructureTest method testExactEuclidean.

/**
 * Actual test routine.
 *
 * @param inputparams
 */
protected void testExactEuclidean(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
    // Use a fixed DBID - historically, we used 1 indexed - to reduce random
    // variation in results due to different hash codes everywhere.
    inputparams.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, new FixedDBIDsFilter(1));
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
    Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> dist = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC);
    if (expectKNNQuery != null) {
        // get the 10 next neighbors
        DoubleVector dv = DoubleVector.wrap(querypoint);
        KNNQuery<DoubleVector> knnq = db.getKNNQuery(dist, k);
        assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
        KNNList ids = knnq.getKNNForObject(dv, k);
        assertEquals("Result size does not match expectation!", shouldd.length, ids.size(), 1e-15);
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", shouldd[i], res.doubleValue(), 1e-6);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(shouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
    if (expectRangeQuery != null) {
        // Do a range query
        DoubleVector dv = DoubleVector.wrap(querypoint);
        RangeQuery<DoubleVector> rangeq = db.getRangeQuery(dist, eps);
        assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
        DoubleDBIDList ids = rangeq.getRangeForObject(dv, eps);
        assertEquals("Result size does not match expectation!", shouldd.length, ids.size(), 1e-15);
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", shouldd[i], res.doubleValue(), 1e-6);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(shouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FixedDBIDsFilter(de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 37 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class DependencyDerivator method run.

/**
 * Computes quantitatively linear dependencies among the attributes of the
 * given database based on a linear correlation PCA.
 *
 * @param database the database to run this DependencyDerivator on
 * @param relation the relation to use
 * @return the CorrelationAnalysisSolution computed by this
 *         DependencyDerivator
 */
public CorrelationAnalysisSolution<V> run(Database database, Relation<V> relation) {
    if (LOG.isVerbose()) {
        LOG.verbose("retrieving database objects...");
    }
    Centroid centroid = Centroid.make(relation, relation.getDBIDs());
    NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
    V centroidDV = factory.newNumberVector(centroid.getArrayRef());
    DBIDs ids;
    if (this.sampleSize > 0) {
        if (randomsample) {
            ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, RandomFactory.DEFAULT);
        } else {
            DistanceQuery<V> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
            KNNList queryResults = // 
            database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
            ids = DBIDUtil.newHashSet(queryResults);
        }
    } else {
        ids = relation.getDBIDs();
    }
    return generateModel(relation, ids, centroid.getArrayRef());
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs)

Example 38 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class DistanceStddevOutlier method run.

/**
 * Run the outlier detection algorithm
 *
 * @param database Database to use
 * @param relation Relation to analyze
 * @return Outlier score result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get a nearest neighbor query on the relation.
    KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
    // Output data storage
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    // Track minimum and maximum scores
    DoubleMinMax minmax = new DoubleMinMax();
    // Iterate over all objects
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        KNNList neighbors = knnq.getKNNForDBID(iter, k);
        // Aggregate distances
        MeanVariance mv = new MeanVariance();
        for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
            // Skip the object itself. The 0 is not very informative.
            if (DBIDUtil.equal(iter, neighbor)) {
                continue;
            }
            mv.put(neighbor.doubleValue());
        }
        // Store score
        scores.putDouble(iter, mv.getSampleStddev());
    }
    // Wrap the result in the standard containers
    // Actual min-max, theoretical min-max!
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
    return new OutlierResult(meta, rel);
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 39 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class KNNClassifier method classProbabilities.

public double[] classProbabilities(O instance, ArrayList<ClassLabel> labels) {
    int[] occurences = new int[labels.size()];
    KNNList query = knnq.getKNNForObject(instance, k);
    for (DoubleDBIDListIter neighbor = query.iter(); neighbor.valid(); neighbor.advance()) {
        int index = Collections.binarySearch(labels, labelrep.get(neighbor));
        if (index >= 0) {
            occurences[index]++;
        }
    }
    double[] distribution = new double[labels.size()];
    for (int i = 0; i < distribution.length; i++) {
        distribution[i] = ((double) occurences[i]) / (double) query.size();
    }
    return distribution;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 40 with KNNList

use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.

the class KNNClassifier method classify.

@Override
public ClassLabel classify(O instance) {
    Object2IntOpenHashMap<ClassLabel> count = new Object2IntOpenHashMap<>();
    KNNList query = knnq.getKNNForObject(instance, k);
    for (DoubleDBIDListIter neighbor = query.iter(); neighbor.valid(); neighbor.advance()) {
        count.addTo(labelrep.get(neighbor), 1);
    }
    int bestoccur = Integer.MIN_VALUE;
    ClassLabel bestl = null;
    for (ObjectIterator<Entry<ClassLabel>> iter = count.object2IntEntrySet().fastIterator(); iter.hasNext(); ) {
        Entry<ClassLabel> entry = iter.next();
        if (entry.getIntValue() > bestoccur) {
            bestoccur = entry.getIntValue();
            bestl = entry.getKey();
        }
    }
    return bestl;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) Entry(it.unimi.dsi.fastutil.objects.Object2IntMap.Entry) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap)

Aggregations

KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)53 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)38 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)32 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)21 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)20 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)18 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)18 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)18 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)18 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)15 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)9 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)9 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)8 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)8 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6