Search in sources :

Example 36 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class LinearScanRKNNQuery method getRKNNForObject.

@Override
public DoubleDBIDList getRKNNForObject(O obj, int k) {
    ModifiableDoubleDBIDList rNNlist = DBIDUtil.newDistanceDBIDList();
    ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
    List<? extends KNNList> kNNLists = knnQuery.getKNNForBulkDBIDs(allIDs, k);
    int i = 0;
    for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
        KNNList knn = kNNLists.get(i);
        int last = Math.min(k - 1, knn.size() - 1);
        double dist = distanceQuery.distance(obj, iter);
        if (last < k - 1 || dist <= knn.get(last).doubleValue()) {
            rNNlist.add(dist, iter);
        }
        i++;
    }
    rNNlist.sort();
    return rNNlist;
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 37 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class OUTRES method refineRange.

/**
 * Refine a range query.
 *
 * @param neighc Original result
 * @param adjustedEps New epsilon
 * @return refined list
 */
private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) {
    ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
    // We don't have a guarantee for this list to be sorted
    for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
        DoubleDBIDPair p = neighbor.getPair();
        double dist = p.doubleValue();
        if (dist <= adjustedEps) {
            n.add(dist, p);
        }
    }
    return n;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)

Example 38 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class ReferenceBasedOutlierDetection method computeDistanceVector.

/**
 * Computes for each object the distance to one reference point. (one
 * dimensional representation of the data set)
 *
 * @param refPoint Reference Point Feature Vector
 * @param database database to work on
 * @param distFunc Distance function to use
 * @return array containing the distance to one reference point for each
 *         database object and the object id
 */
protected DoubleDBIDList computeDistanceVector(NumberVector refPoint, Relation<? extends NumberVector> database, PrimitiveDistanceQuery<? super NumberVector> distFunc) {
    ModifiableDoubleDBIDList referenceDists = DBIDUtil.newDistanceDBIDList(database.size());
    for (DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
        referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
    }
    referenceDists.sort();
    return referenceDists;
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 39 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class LuceneDistanceRangeQuery method getRangeForDBID.

@Override
public DoubleDBIDList getRangeForDBID(DBIDRef id, double range) {
    ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
    getRangeForDBID(id, range, result);
    return result;
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)

Example 40 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class KMeansMinusMinus method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    // Intialisieren der means
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // initialisieren vom Heap
    final int heapsize = (int) (rate < 1. ? Math.ceil(relation.size() * rate) : rate);
    DoubleMinHeap minHeap = new DoubleMinHeap(heapsize);
    // Setup cluster assignment store
    List<ModifiableDoubleDBIDList> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newDistanceDBIDList((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat = new DoubleStatistic(this.getClass().getName() + ".variance-sum");
    // Otherwise, the vartotal break below will fail!
    assert (varstat != null);
    int iteration = 0;
    double prevvartotal = Double.POSITIVE_INFINITY;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        minHeap.clear();
        for (int i = 0; i < k; i++) {
            clusters.get(i).clear();
        }
        LOG.incrementProcessed(prog);
        boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum, minHeap, heapsize);
        double vartotal = logVarstat(varstat, varsum);
        // than the previous value.
        if (!changed || vartotal > prevvartotal) {
            break;
        }
        prevvartotal = vartotal;
        // Recompute means.
        means = meansWithTreshhold(clusters, means, relation, heapsize > 0 ? minHeap.peek() : Double.POSITIVE_INFINITY);
    }
    // create noisecluster if wanted
    ModifiableDoubleDBIDList noiseids = null;
    if (noiseFlag && heapsize > 0) {
        clusters.add(noiseids = DBIDUtil.newDistanceDBIDList((int) (relation.size() * 2. / k)));
        double tresh = minHeap.peek();
        for (int i = 0; i < k; i++) {
            for (DoubleDBIDListMIter it = clusters.get(i).iter(); it.valid(); it.advance()) {
                final double dist = it.doubleValue();
                // Add to the noise cluster:
                if (dist >= tresh) {
                    noiseids.add(dist, it);
                    assignment.putInt(it, k);
                    it.remove();
                }
            }
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < k; i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        KMeansModel model = new KMeansModel(means[i], varsum[i]);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    // Noise Cluster
    if (noiseFlag) {
        KMeansModel model = new KMeansModel(null, 0);
        DBIDs ids = noiseids;
        if (ids.size() == 0) {
            return result;
        }
        result.addToplevelCluster(new Cluster<>(ids, true, model));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) DoubleMinHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) DoubleDBIDListMIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)

Aggregations

ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)53 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)16 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)9 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 ArrayList (java.util.ArrayList)5 Test (org.junit.Test)5 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DoubleDBIDListMIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter)2