use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class Segments method recursivelyFill.
private void recursivelyFill(List<List<? extends Cluster<?>>> cs, int depth, SetDBIDs first, SetDBIDs second, int[] path, boolean objectsegment) {
final int numclusterings = cs.size();
Iterator<? extends Cluster<?>> iter = cs.get(depth).iterator();
for (int cnum = 0; iter.hasNext(); cnum++) {
Cluster<?> clust = iter.next();
// Compute intersections with new cluster.
// nfp := intersection( first, cluster )
// Adding asymmetric differences to nd1, nd2.
// nse := intersection( second, cluster )
HashSetModifiableDBIDs nfirstp = DBIDUtil.newHashSet(first.size());
HashSetModifiableDBIDs ndelta1 = DBIDUtil.newHashSet(first);
HashSetModifiableDBIDs ndelta2 = DBIDUtil.newHashSet();
HashSetModifiableDBIDs nsecond = DBIDUtil.newHashSet(second.size());
for (DBIDIter iter2 = clust.getIDs().iter(); iter2.valid(); iter2.advance()) {
if (ndelta1.remove(iter2)) {
nfirstp.add(iter2);
} else {
ndelta2.add(iter2);
}
if (second.contains(iter2)) {
nsecond.add(iter2);
}
}
if (nsecond.size() <= 0) {
// disjoint
continue;
}
if (nfirstp.size() > 0) {
path[depth] = cnum;
if (depth < numclusterings - 1) {
recursivelyFill(cs, depth + 1, nfirstp, nsecond, path, objectsegment);
} else {
// Add to results.
// In fact, nfirstp should equal nsecond here
int selfpairs = DBIDUtil.intersectionSize(nfirstp, nsecond);
if (objectsegment) {
makeOrUpdateSegment(path, nfirstp, (nfirstp.size() * nsecond.size()) - selfpairs);
} else {
makeOrUpdateSegment(path, null, (nfirstp.size() * nsecond.size()) - selfpairs);
}
}
}
// Elements that were in first, but in not in the cluster
if (ndelta1.size() > 0) {
path[depth] = Segment.UNCLUSTERED;
if (depth < numclusterings - 1) {
recursivelyFill(cs, depth + 1, ndelta1, nsecond, path, false);
} else {
// Add to results.
int selfpairs = DBIDUtil.intersection(ndelta1, nsecond).size();
makeOrUpdateSegment(path, null, (ndelta1.size() * nsecond.size()) - selfpairs);
}
}
// It used to work in revision 9236, eventually go back to this code!
if (ndelta2.size() > 0 && objectsegment) {
int[] npath = new int[path.length];
Arrays.fill(npath, Segment.UNCLUSTERED);
npath[depth] = cnum;
if (depth < numclusterings - 1) {
recursivelyFill(cs, depth + 1, ndelta2, nsecond, npath, false);
} else {
// Add to results.
int selfpairs = DBIDUtil.intersection(ndelta2, nsecond).size();
makeOrUpdateSegment(npath, null, (ndelta2.size() * nsecond.size()) - selfpairs);
}
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class PreDeConNeighborPredicate method computeLocalModel.
@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
final int referenceSetSize = neighbors.size();
mvSize.put(referenceSetSize);
// Shouldn't happen:
if (referenceSetSize < 0) {
LOG.warning("Empty reference set - should at least include the query point!");
return new PreDeConModel(Integer.MAX_VALUE, DBIDUtil.EMPTYDBIDS);
}
V obj = relation.get(id);
final int dim = obj.getDimensionality();
// Per-dimension variances:
double[] s = new double[dim];
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
V o = relation.get(neighbor);
for (int d = 0; d < dim; d++) {
final double diff = obj.doubleValue(d) - o.doubleValue(d);
s[d] += diff * diff;
}
}
// Adjust for sample size
for (int d = 0; d < dim; d++) {
s[d] /= referenceSetSize;
mvVar.put(s[d]);
}
// Preference weight vector
double[] weights = new double[dim];
int pdim = 0;
for (int d = 0; d < dim; d++) {
if (s[d] <= settings.delta) {
weights[d] = settings.kappa;
pdim++;
} else {
weights[d] = 1.;
}
}
// Check which neighbors survive
HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(referenceSetSize);
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
V o = relation.get(neighbor);
// Weighted Euclidean distance:
double dev = 0.;
for (int d = 0; d < dim; d++) {
final double diff = obj.doubleValue(d) - o.doubleValue(d);
dev += weights[d] * diff * diff;
}
// Note: epsilon was squared - this saves us the sqrt here:
if (dev <= epsilon) {
survivors.add(neighbor);
}
}
return new PreDeConModel(pdim, survivors);
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class MaterializeKNNAndRKNNPreprocessor method affectedRkNN.
/**
* Extracts and removes the DBIDs in the given collections.
*
* @param extract a list of lists of DistanceResultPair to extract
* @param remove the ids to remove
* @return the DBIDs in the given collection
*/
protected ArrayDBIDs affectedRkNN(List<? extends Collection<DoubleDBIDPair>> extract, DBIDs remove) {
HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
for (Collection<DoubleDBIDPair> drps : extract) {
for (DoubleDBIDPair drp : drps) {
ids.add(drp);
}
}
ids.removeDBIDs(remove);
// Convert back to array
return DBIDUtil.newArray(ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class MaterializeKNNAndRKNNPreprocessor method affectedkNN.
/**
* Extracts and removes the DBIDs in the given collections.
*
* @param extract a list of lists of DistanceResultPair to extract
* @param remove the ids to remove
* @return the DBIDs in the given collection
*/
protected ArrayDBIDs affectedkNN(List<? extends KNNList> extract, DBIDs remove) {
HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
for (KNNList drps : extract) {
for (DBIDIter iter = drps.iter(); iter.valid(); iter.advance()) {
ids.add(iter);
}
}
ids.removeDBIDs(remove);
// Convert back to array
return DBIDUtil.newArray(ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class HilOut method run.
public OutlierResult run(Database database, Relation<O> relation) {
distq = database.getDistanceQuery(relation, getDistanceFunction());
d = RelationUtil.dimensionality(relation);
WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// Compute extend of dataset.
double[] min;
// Actually "length of edge"
double diameter = 0;
{
double[][] hbbs = RelationUtil.computeMinMax(relation);
min = hbbs[0];
double[] max = hbbs[1];
for (int i = 0; i < d; i++) {
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for (int i = 0; i < d; i++) {
double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
if (LOG.isVerbose()) {
LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
// Initialization part
capital_n_star = capital_n = relation.size();
HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
for (int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
h.out.clear();
h.wlb.clear();
// Initialize Hilbert values in pf according to current shift
h.initialize(.5 * j / (d + 1));
// scan the Data according to the current shift; build out and wlb
scan(h, (int) (k * capital_n / (double) capital_n_star));
// determine the true outliers (n_star)
trueOutliers(h);
if (progressTrueOut != null) {
progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
top_keys.add(entry.id);
h.top.add(entry);
}
for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
if (!top_keys.contains(entry.id)) {
// No need to update top_keys - discarded
h.top.add(entry);
}
}
LOG.incrementProcessed(progressHilOut);
}
// 2. Phase: Additional Scan if less than n true outliers determined
if (n_star < n) {
h.out.clear();
h.wlb.clear();
// TODO: reinitialize shift to 0?
scan(h, capital_n);
}
if (progressHilOut != null) {
progressHilOut.setProcessed(d, LOG);
progressHilOut.ensureCompleted(LOG);
}
if (progressTrueOut != null) {
progressTrueOut.setProcessed(n, LOG);
progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
if (tn == ScoreType.TopN) {
minmax.put(0.0);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
hilout_weight.putDouble(iditer, 0.0);
}
for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature ent = iter.get();
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
} else // Return all weights in pf
{
for (HilFeature ent : h.pf) {
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("HilOut weight", "hilout-weight", hilout_weight, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
Aggregations