Search in sources :

Example 1 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class Segments method recursivelyFill.

private void recursivelyFill(List<List<? extends Cluster<?>>> cs, int depth, SetDBIDs first, SetDBIDs second, int[] path, boolean objectsegment) {
    final int numclusterings = cs.size();
    Iterator<? extends Cluster<?>> iter = cs.get(depth).iterator();
    for (int cnum = 0; iter.hasNext(); cnum++) {
        Cluster<?> clust = iter.next();
        // Compute intersections with new cluster.
        // nfp := intersection( first, cluster )
        // Adding asymmetric differences to nd1, nd2.
        // nse := intersection( second, cluster )
        HashSetModifiableDBIDs nfirstp = DBIDUtil.newHashSet(first.size());
        HashSetModifiableDBIDs ndelta1 = DBIDUtil.newHashSet(first);
        HashSetModifiableDBIDs ndelta2 = DBIDUtil.newHashSet();
        HashSetModifiableDBIDs nsecond = DBIDUtil.newHashSet(second.size());
        for (DBIDIter iter2 = clust.getIDs().iter(); iter2.valid(); iter2.advance()) {
            if (ndelta1.remove(iter2)) {
                nfirstp.add(iter2);
            } else {
                ndelta2.add(iter2);
            }
            if (second.contains(iter2)) {
                nsecond.add(iter2);
            }
        }
        if (nsecond.size() <= 0) {
            // disjoint
            continue;
        }
        if (nfirstp.size() > 0) {
            path[depth] = cnum;
            if (depth < numclusterings - 1) {
                recursivelyFill(cs, depth + 1, nfirstp, nsecond, path, objectsegment);
            } else {
                // Add to results.
                // In fact, nfirstp should equal nsecond here
                int selfpairs = DBIDUtil.intersectionSize(nfirstp, nsecond);
                if (objectsegment) {
                    makeOrUpdateSegment(path, nfirstp, (nfirstp.size() * nsecond.size()) - selfpairs);
                } else {
                    makeOrUpdateSegment(path, null, (nfirstp.size() * nsecond.size()) - selfpairs);
                }
            }
        }
        // Elements that were in first, but in not in the cluster
        if (ndelta1.size() > 0) {
            path[depth] = Segment.UNCLUSTERED;
            if (depth < numclusterings - 1) {
                recursivelyFill(cs, depth + 1, ndelta1, nsecond, path, false);
            } else {
                // Add to results.
                int selfpairs = DBIDUtil.intersection(ndelta1, nsecond).size();
                makeOrUpdateSegment(path, null, (ndelta1.size() * nsecond.size()) - selfpairs);
            }
        }
        // It used to work in revision 9236, eventually go back to this code!
        if (ndelta2.size() > 0 && objectsegment) {
            int[] npath = new int[path.length];
            Arrays.fill(npath, Segment.UNCLUSTERED);
            npath[depth] = cnum;
            if (depth < numclusterings - 1) {
                recursivelyFill(cs, depth + 1, ndelta2, nsecond, npath, false);
            } else {
                // Add to results.
                int selfpairs = DBIDUtil.intersection(ndelta2, nsecond).size();
                makeOrUpdateSegment(npath, null, (ndelta2.size() * nsecond.size()) - selfpairs);
            }
        }
    }
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 2 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class PreDeConNeighborPredicate method computeLocalModel.

@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
    final int referenceSetSize = neighbors.size();
    mvSize.put(referenceSetSize);
    // Shouldn't happen:
    if (referenceSetSize < 0) {
        LOG.warning("Empty reference set - should at least include the query point!");
        return new PreDeConModel(Integer.MAX_VALUE, DBIDUtil.EMPTYDBIDS);
    }
    V obj = relation.get(id);
    final int dim = obj.getDimensionality();
    // Per-dimension variances:
    double[] s = new double[dim];
    for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        V o = relation.get(neighbor);
        for (int d = 0; d < dim; d++) {
            final double diff = obj.doubleValue(d) - o.doubleValue(d);
            s[d] += diff * diff;
        }
    }
    // Adjust for sample size
    for (int d = 0; d < dim; d++) {
        s[d] /= referenceSetSize;
        mvVar.put(s[d]);
    }
    // Preference weight vector
    double[] weights = new double[dim];
    int pdim = 0;
    for (int d = 0; d < dim; d++) {
        if (s[d] <= settings.delta) {
            weights[d] = settings.kappa;
            pdim++;
        } else {
            weights[d] = 1.;
        }
    }
    // Check which neighbors survive
    HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(referenceSetSize);
    for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        V o = relation.get(neighbor);
        // Weighted Euclidean distance:
        double dev = 0.;
        for (int d = 0; d < dim; d++) {
            final double diff = obj.doubleValue(d) - o.doubleValue(d);
            dev += weights[d] * diff * diff;
        }
        // Note: epsilon was squared - this saves us the sqrt here:
        if (dev <= epsilon) {
            survivors.add(neighbor);
        }
    }
    return new PreDeConModel(pdim, survivors);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class MaterializeKNNAndRKNNPreprocessor method affectedRkNN.

/**
 * Extracts and removes the DBIDs in the given collections.
 *
 * @param extract a list of lists of DistanceResultPair to extract
 * @param remove the ids to remove
 * @return the DBIDs in the given collection
 */
protected ArrayDBIDs affectedRkNN(List<? extends Collection<DoubleDBIDPair>> extract, DBIDs remove) {
    HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
    for (Collection<DoubleDBIDPair> drps : extract) {
        for (DoubleDBIDPair drp : drps) {
            ids.add(drp);
        }
    }
    ids.removeDBIDs(remove);
    // Convert back to array
    return DBIDUtil.newArray(ids);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DoubleDBIDPair(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)

Example 4 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class MaterializeKNNAndRKNNPreprocessor method affectedkNN.

/**
 * Extracts and removes the DBIDs in the given collections.
 *
 * @param extract a list of lists of DistanceResultPair to extract
 * @param remove the ids to remove
 * @return the DBIDs in the given collection
 */
protected ArrayDBIDs affectedkNN(List<? extends KNNList> extract, DBIDs remove) {
    HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
    for (KNNList drps : extract) {
        for (DBIDIter iter = drps.iter(); iter.valid(); iter.advance()) {
            ids.add(iter);
        }
    }
    ids.removeDBIDs(remove);
    // Convert back to array
    return DBIDUtil.newArray(ids);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class HilOut method run.

public OutlierResult run(Database database, Relation<O> relation) {
    distq = database.getDistanceQuery(relation, getDistanceFunction());
    d = RelationUtil.dimensionality(relation);
    WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    // Compute extend of dataset.
    double[] min;
    // Actually "length of edge"
    double diameter = 0;
    {
        double[][] hbbs = RelationUtil.computeMinMax(relation);
        min = hbbs[0];
        double[] max = hbbs[1];
        for (int i = 0; i < d; i++) {
            diameter = Math.max(diameter, max[i] - min[i]);
        }
        // Enlarge bounding box to have equal lengths.
        for (int i = 0; i < d; i++) {
            double diff = (diameter - (max[i] - min[i])) * .5;
            min[i] -= diff;
            max[i] += diff;
        }
        if (LOG.isVerbose()) {
            LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
        }
    }
    // Initialization part
    capital_n_star = capital_n = relation.size();
    HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
    FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
    FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
    // Main part: 1. Phase max. d+1 loops
    for (int j = 0; j <= d && n_star < n; j++) {
        // initialize (clear) out and wlb - not 100% clear in the paper
        h.out.clear();
        h.wlb.clear();
        // Initialize Hilbert values in pf according to current shift
        h.initialize(.5 * j / (d + 1));
        // scan the Data according to the current shift; build out and wlb
        scan(h, (int) (k * capital_n / (double) capital_n_star));
        // determine the true outliers (n_star)
        trueOutliers(h);
        if (progressTrueOut != null) {
            progressTrueOut.setProcessed(n_star, LOG);
        }
        // Build the top Set as out + wlb
        h.top.clear();
        HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature entry = iter.get();
            top_keys.add(entry.id);
            h.top.add(entry);
        }
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature entry = iter.get();
            if (!top_keys.contains(entry.id)) {
                // No need to update top_keys - discarded
                h.top.add(entry);
            }
        }
        LOG.incrementProcessed(progressHilOut);
    }
    // 2. Phase: Additional Scan if less than n true outliers determined
    if (n_star < n) {
        h.out.clear();
        h.wlb.clear();
        // TODO: reinitialize shift to 0?
        scan(h, capital_n);
    }
    if (progressHilOut != null) {
        progressHilOut.setProcessed(d, LOG);
        progressHilOut.ensureCompleted(LOG);
    }
    if (progressTrueOut != null) {
        progressTrueOut.setProcessed(n, LOG);
        progressTrueOut.ensureCompleted(LOG);
    }
    DoubleMinMax minmax = new DoubleMinMax();
    // Return weights in out
    if (tn == ScoreType.TopN) {
        minmax.put(0.0);
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            hilout_weight.putDouble(iditer, 0.0);
        }
        for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
            HilFeature ent = iter.get();
            minmax.put(ent.ubound);
            hilout_weight.putDouble(ent.id, ent.ubound);
        }
    } else // Return all weights in pf
    {
        for (HilFeature ent : h.pf) {
            minmax.put(ent.ubound);
            hilout_weight.putDouble(ent.id, ent.ubound);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("HilOut weight", "hilout-weight", hilout_weight, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ObjectHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)21 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)5 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)5 ArrayList (java.util.ArrayList)5 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)4 Test (org.junit.Test)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)2 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)2 SortedEigenPairs (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)2 PreDeConModel (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate.PreDeConModel)1 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1