Search in sources :

Example 1 with Heap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap in project elki by elki-project.

the class HiCS method calculateSubspaces.

/**
 * Identifies high contrast subspaces in a given full-dimensional database.
 *
 * @param relation the relation the HiCS should be evaluated for
 * @param subspaceIndex Subspace indexes
 * @return a set of high contrast subspaces
 */
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
    final int dbdim = RelationUtil.dimensionality(relation);
    FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
    if (dprog != null) {
        dprog.setProcessed(2, LOG);
    }
    TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
    TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
    // compute two-element sets of subspaces
    for (int i = 0; i < dbdim; i++) {
        for (int j = i + 1; j < dbdim; j++) {
            HiCSSubspace ts = new HiCSSubspace();
            ts.set(i);
            ts.set(j);
            calculateContrast(relation, ts, subspaceIndex, random);
            dDimensionalList.add(ts);
            LOG.incrementProcessed(prog);
        }
    }
    LOG.ensureCompleted(prog);
    IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
    for (int d = 3; !dDimensionalList.isEmpty(); d++) {
        if (dprog != null) {
            dprog.setProcessed(d, LOG);
        }
        // result now contains all d-dimensional sets of subspaces
        ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
        for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
            subspaceList.add(it.get());
            candidateList.add(it.get());
        }
        dDimensionalList.clear();
        // candidateList now contains the *m* best d-dimensional sets
        Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
        // TODO: optimize APRIORI style, by not even computing the bit set or?
        for (int i = 0; i < candidateList.size() - 1; i++) {
            for (int j = i + 1; j < candidateList.size(); j++) {
                HiCSSubspace set1 = candidateList.get(i);
                HiCSSubspace set2 = candidateList.get(j);
                HiCSSubspace joinedSet = new HiCSSubspace();
                joinedSet.or(set1);
                joinedSet.or(set2);
                if (joinedSet.cardinality() != d) {
                    continue;
                }
                calculateContrast(relation, joinedSet, subspaceIndex, random);
                dDimensionalList.add(joinedSet);
                LOG.incrementProcessed(qprog);
            }
        }
        // Prune
        for (HiCSSubspace cand : candidateList) {
            for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
                if (it.get().contrast > cand.contrast) {
                    subspaceList.remove(cand);
                    break;
                }
            }
        }
    }
    LOG.setCompleted(qprog);
    if (dprog != null) {
        dprog.setProcessed(dbdim, LOG);
        dprog.ensureCompleted(LOG);
    }
    return subspaceList;
}
Also used : TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) TreeSet(java.util.TreeSet) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Example 2 with Heap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap in project elki by elki-project.

the class AggarwalYuEvolutionary method run.

/**
 * Performs the evolutionary algorithm on the given database.
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Database database, Relation<V> relation) {
    final int dbsize = relation.size();
    ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
    Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getSingleThreadedRandom())).run();
    WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    for (; individuums.valid(); individuums.advance()) {
        DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
        double sparsityC = sparsity(ids.size(), dbsize, k, phi);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double prev = outlierScore.doubleValue(iter);
            if (Double.isNaN(prev) || sparsityC < prev) {
                outlierScore.putDouble(iter, sparsityC);
            }
        }
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = outlierScore.doubleValue(iditer);
        if (Double.isNaN(val)) {
            outlierScore.putDouble(iditer, 0.0);
            val = 0.0;
        }
        minmax.put(val);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 3 with Heap

use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap in project elki by elki-project.

the class XSplitter method getSurfaceSums4Sorting.

/**
 * Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
 * split MBRs resulting for the sorting <code>entrySorting</code>.
 *
 * @param minEntries minimally allowed subgroup size
 * @param maxEntries maximally allowed subgroup size for the first entry set
 * @param entrySorting a permutation of the indices of {@link #entries}
 * @param dim the dimension of the tree
 * @return the sum of all first and second MBRs' surfaces for the tested entry
 *         distributions
 */
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
    // avoid multiple MBR calculations by updating min/max-logs for the two
    // collections' bounds:
    // the first entries' maximum upper bounds
    double[] pqUBFirst = new double[dim];
    Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
    // maintain the second entries' upper bounds
    List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Descending heap
        pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
    }
    // the first entries' minimum lower bounds
    double[] pqLBFirst = new double[dim];
    Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
    // maintain the second entries' minimum lower bounds
    List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
    for (int i = 0; i < dim; i++) {
        // Ascending heap
        pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
    }
    // initialize bounds for first entry collection
    for (int index = 0; index < minEntries; index++) {
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
    }
    HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
    // fill bounding queues for the second entry collection
    double[] minSecond = new double[dim];
    double[] maxSecond = new double[dim];
    Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
    Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
    assert entrySorting.length - maxEntries == minEntries;
    // initialize min/max entries of the second collections' tail
    for (int index = maxEntries; index < entrySorting.length; index++) {
        add2MBR(entrySorting, maxSecond, minSecond, index);
    }
    for (int i = 0; i < dim; i++) {
        // with index entrySorting.length => never to be removed
        pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
        pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
    }
    // add the entries to be removed later on
    for (int index = minEntries; index < maxEntries; index++) {
        add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
    }
    for (int i = 0; i < minSecond.length; i++) {
        minSecond[i] = pqLBSecond.get(i).peek().first;
        maxSecond[i] = pqUBSecond.get(i).peek().first;
    }
    ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
    double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    // generate the other distributions and file the surface sums
    for (int limit = minEntries; limit < maxEntries; limit++) {
        // extend first MBR by entry at position entrySorting[limit]:
        add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
        // shrink entry at position entrySorting[limit] from second MBR:
        removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
        surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
    }
    return surfaceSum;
}
Also used : ArrayList(java.util.ArrayList) DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) HyperBoundingBox(de.lmu.ifi.dbs.elki.data.HyperBoundingBox) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)

Aggregations

Heap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap)3 TopBoundedHeap (de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap)3 ArrayList (java.util.ArrayList)2 HyperBoundingBox (de.lmu.ifi.dbs.elki.data.HyperBoundingBox)1 ModifiableHyperBoundingBox (de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox)1 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)1 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)1 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)1 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)1 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)1 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)1 DoubleIntPair (de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair)1 IntArrayList (it.unimi.dsi.fastutil.ints.IntArrayList)1 TreeSet (java.util.TreeSet)1