use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap in project elki by elki-project.
the class HiCS method calculateSubspaces.
/**
* Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
if (dprog != null) {
dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
for (int i = 0; i < dbdim; i++) {
for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
for (int d = 3; !dDimensionalList.isEmpty(); d++) {
if (dprog != null) {
dprog.setProcessed(d, LOG);
}
// result now contains all d-dimensional sets of subspaces
ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
subspaceList.add(it.get());
candidateList.add(it.get());
}
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
for (int i = 0; i < candidateList.size() - 1; i++) {
for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
if (joinedSet.cardinality() != d) {
continue;
}
calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
LOG.incrementProcessed(qprog);
}
}
// Prune
for (HiCSSubspace cand : candidateList) {
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
if (it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
LOG.setCompleted(qprog);
if (dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
}
return subspaceList;
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap in project elki by elki-project.
the class AggarwalYuEvolutionary method run.
/**
* Performs the evolutionary algorithm on the given database.
*
* @param database Database
* @param relation Relation
* @return Result
*/
public OutlierResult run(Database database, Relation<V> relation) {
final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getSingleThreadedRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
for (; individuums.valid(); individuums.advance()) {
DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
double sparsityC = sparsity(ids.size(), dbsize, k, phi);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double prev = outlierScore.doubleValue(iter);
if (Double.isNaN(prev) || sparsityC < prev) {
outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = outlierScore.doubleValue(iditer);
if (Double.isNaN(val)) {
outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap in project elki by elki-project.
the class XSplitter method getSurfaceSums4Sorting.
/**
* Compute the surfaces of the <code>2 * (maxEntries - minEntries + 1)</code>
* split MBRs resulting for the sorting <code>entrySorting</code>.
*
* @param minEntries minimally allowed subgroup size
* @param maxEntries maximally allowed subgroup size for the first entry set
* @param entrySorting a permutation of the indices of {@link #entries}
* @param dim the dimension of the tree
* @return the sum of all first and second MBRs' surfaces for the tested entry
* distributions
*/
private double getSurfaceSums4Sorting(int minEntries, int maxEntries, int[] entrySorting, int dim) {
// avoid multiple MBR calculations by updating min/max-logs for the two
// collections' bounds:
// the first entries' maximum upper bounds
double[] pqUBFirst = new double[dim];
Arrays.fill(pqUBFirst, Double.NEGATIVE_INFINITY);
// maintain the second entries' upper bounds
List<Heap<DoubleIntPair>> pqUBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Descending heap
pqUBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries, Collections.reverseOrder()));
}
// the first entries' minimum lower bounds
double[] pqLBFirst = new double[dim];
Arrays.fill(pqLBFirst, Double.POSITIVE_INFINITY);
// maintain the second entries' minimum lower bounds
List<Heap<DoubleIntPair>> pqLBSecond = new ArrayList<>(dim);
for (int i = 0; i < dim; i++) {
// Ascending heap
pqLBSecond.add(new TopBoundedHeap<DoubleIntPair>(maxEntries));
}
// initialize bounds for first entry collection
for (int index = 0; index < minEntries; index++) {
add2MBR(entrySorting, pqUBFirst, pqLBFirst, index);
}
HyperBoundingBox mbr1 = new HyperBoundingBox(pqLBFirst, pqUBFirst);
// fill bounding queues for the second entry collection
double[] minSecond = new double[dim];
double[] maxSecond = new double[dim];
Arrays.fill(maxSecond, Double.NEGATIVE_INFINITY);
Arrays.fill(minSecond, Double.POSITIVE_INFINITY);
assert entrySorting.length - maxEntries == minEntries;
// initialize min/max entries of the second collections' tail
for (int index = maxEntries; index < entrySorting.length; index++) {
add2MBR(entrySorting, maxSecond, minSecond, index);
}
for (int i = 0; i < dim; i++) {
// with index entrySorting.length => never to be removed
pqLBSecond.get(i).add(new DoubleIntPair(minSecond[i], entrySorting.length));
pqUBSecond.get(i).add(new DoubleIntPair(maxSecond[i], entrySorting.length));
}
// add the entries to be removed later on
for (int index = minEntries; index < maxEntries; index++) {
add2MBR(entrySorting, pqUBSecond, pqLBSecond, index);
}
for (int i = 0; i < minSecond.length; i++) {
minSecond[i] = pqLBSecond.get(i).peek().first;
maxSecond[i] = pqUBSecond.get(i).peek().first;
}
ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(minSecond, maxSecond);
double surfaceSum = SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
// generate the other distributions and file the surface sums
for (int limit = minEntries; limit < maxEntries; limit++) {
// extend first MBR by entry at position entrySorting[limit]:
add2MBR(entrySorting, pqUBFirst, pqLBFirst, limit);
// shrink entry at position entrySorting[limit] from second MBR:
removeFromMBR(pqUBSecond, pqLBSecond, limit, mbr2);
surfaceSum += SpatialUtil.perimeter(mbr1) + SpatialUtil.perimeter(mbr2);
}
return surfaceSum;
}
Aggregations