use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class ALOCI method run.
public OutlierResult run(Database database, Relation<O> relation) {
final int dim = RelationUtil.dimensionality(relation);
final Random random = rnd.getSingleThreadedRandom();
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
{
double[][] hbbs = RelationUtil.computeMinMax(relation);
min = hbbs[0];
max = hbbs[1];
double maxd = 0;
for (int i = 0; i < dim; i++) {
maxd = MathUtil.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for (int i = 0; i < dim; i++) {
double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
}
List<ALOCIQuadTree> qts = new ArrayList<>(g);
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
* the paper and therefore implemented in a way that achieves good results
* with the test data.
*/
for (int shift = 1; shift < g; shift++) {
double[] svec = new double[dim];
for (int i = 0; i < dim; i++) {
svec[i] = random.nextDouble() * (max[i] - min[i]);
}
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
}
LOG.ensureCompleted(progressPreproc);
// aLOCI main loop: evaluate
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
// For each level
for (int l = 0; ; l++) {
// Find the closest C_i
Node ci = null;
for (int i = 0; i < g; i++) {
Node ci2 = qts.get(i).findClosestNode(obj, l);
if (ci2.getLevel() != l) {
continue;
}
// TODO: always use manhattan?
if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
ci = ci2;
}
}
// LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
if (ci == null) {
// no matching tree for this level.
break;
}
// Find the closest C_j
Node cj = null;
for (int i = 0; i < g; i++) {
Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
// TODO: allow higher levels or not?
if (cj != null && cj2.getLevel() < cj.getLevel()) {
continue;
}
// TODO: always use manhattan?
if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
cj = cj2;
}
}
// LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
if (cj == null) {
// no matching tree for this level.
continue;
}
double mdefnorm = calculate_MDEF_norm(cj, ci);
// LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
// " mdef: " + mdefnorm);
maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
}
// Store results
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class FlexibleLOF method computeLRDs.
/**
* Computes the local reachability density (LRD) of the specified objects.
*
* @param knnq the precomputed neighborhood of the objects w.r.t. the
* reachability distance
* @param ids the ids of the objects
* @param lrds Reachability storage
*/
protected void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(iter, kreach);
double sum = 0.0;
int count = 0;
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, iter)) {
continue;
}
KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, kreach);
sum += MathUtil.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
count++;
}
// Avoid division by 0
final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
lrds.putDouble(iter, lrd);
LOG.incrementProcessed(lrdsProgress);
}
LOG.ensureCompleted(lrdsProgress);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LDOF method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
// compute the ldof values
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
if (LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
dxp.reset();
Dxp.reset();
DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
for (; neighbor1.valid(); neighbor1.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor1, iditer)) {
continue;
}
dxp.put(neighbor1.doubleValue());
for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor2, iditer)) {
continue;
}
Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
ldof = 1.0;
}
ldofs.putDouble(iditer, ldof);
// update maximum
ldofminmax.put(ldof);
LOG.incrementProcessed(progressLDOFs);
}
LOG.ensureCompleted(progressLDOFs);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class HiCS method calculateSubspaces.
/**
* Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
if (dprog != null) {
dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
for (int i = 0; i < dbdim; i++) {
for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
for (int d = 3; !dDimensionalList.isEmpty(); d++) {
if (dprog != null) {
dprog.setProcessed(d, LOG);
}
// result now contains all d-dimensional sets of subspaces
ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
subspaceList.add(it.get());
candidateList.add(it.get());
}
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
for (int i = 0; i < candidateList.size() - 1; i++) {
for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
if (joinedSet.cardinality() != d) {
continue;
}
calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
LOG.incrementProcessed(qprog);
}
}
// Prune
for (HiCSSubspace cand : candidateList) {
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
if (it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
LOG.setCompleted(qprog);
if (dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
}
return subspaceList;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DBOutlierDetection method computeOutlierScores.
@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Prefer kNN query if available, as this will usually stop earlier.
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
RangeQuery<O> rangeQuery = knnQuery == null ? database.getRangeQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY, d) : null;
// maximum number of objects in the D-neighborhood of an outlier
int m = (int) Math.floor((distFunc.getRelation().size()) * (1 - p));
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier detection", distFunc.getRelation().size(), LOG) : null;
// is more than d -> object is outlier
if (knnQuery != null) {
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, m);
scores.putDouble(iditer, (knns.getKNNDistance() > d) ? 1. : 0.);
LOG.incrementProcessed(prog);
}
} else if (rangeQuery != null) {
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, d);
scores.putDouble(iditer, (neighbors.size() < m) ? 1. : 0.);
LOG.incrementProcessed(prog);
}
} else {
// Linear scan neighbors for each object, but stop early.
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
int count = 0;
for (DBIDIter iterator = relation.iterDBIDs(); iterator.valid(); iterator.advance()) {
double currentDistance = distFunc.distance(iditer, iterator);
if (currentDistance <= d) {
if (++count >= m) {
break;
}
}
}
scores.putDouble(iditer, (count < m) ? 1.0 : 0);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
return scores;
}
Aggregations