use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.
the class FlexibleLOF method run.
/**
* Performs the Generalized LOF algorithm on the given database by calling
* {@link #doRunInTime}.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O> kNNRefer = pair.getFirst();
KNNQuery<O> kNNReach = pair.getSecond();
return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
}
use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.
the class OnlineLOF method getKNNAndRkNNQueries.
/**
* Get the kNN and rkNN queries for the algorithm.
*
* @param relation Data
* @param stepprog Progress logger
* @return the kNN and rkNN queries
*/
private Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
DistanceQuery<O> drefQ = database.getDistanceQuery(relation, referenceDistanceFunction);
// Use "HEAVY" flag, since this is an online algorithm
KNNQuery<O> kNNRefer = database.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
RKNNQuery<O> rkNNRefer = database.getRKNNQuery(drefQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query or RkNN query - use a preprocessor!
if (kNNRefer == null || rkNNRefer == null) {
if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
kNNRefer = preproc.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
rkNNRefer = preproc.getRKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
// add as index
database.getHierarchy().add(relation, preproc);
} else {
if (stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
DistanceQuery<O> dreachQ = database.getDistanceQuery(relation, reachabilityDistanceFunction);
KNNQuery<O> kNNReach = database.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
RKNNQuery<O> rkNNReach = database.getRKNNQuery(dreachQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
if (kNNReach == null || rkNNReach == null) {
if (stepprog != null) {
stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
kNNReach = preproc.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
rkNNReach = preproc.getRKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
// add as index
database.getHierarchy().add(relation, preproc);
}
Pair<KNNQuery<O>, KNNQuery<O>> kNNPair = new Pair<>(kNNRefer, kNNReach);
Pair<RKNNQuery<O>, RKNNQuery<O>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
return new Pair<>(kNNPair, rkNNPair);
}
use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.
the class ComputeKNNOutlierScores method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
// Ensure we don't go beyond the relation size:
final int maxk = Math.min(this.maxk, relation.size() - 1);
// Get a KNN query.
final int lim = Math.min(maxk + 2, relation.size());
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
// Precompute kNN:
if (!(knnq instanceof PreprocessorKNNQuery)) {
MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
preproc.initialize();
relation.getHierarchy().add(relation, preproc);
}
// Test that we now get a proper index query
knnq = QueryUtil.getKNNQuery(relation, distf, lim);
if (!(knnq instanceof PreprocessorKNNQuery)) {
throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
}
// Warn for some known slow methods and large k:
if (!isDisabled("LDOF") && maxk > 100) {
LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
}
if (!isDisabled("FastABOD") && maxk > 100) {
LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
}
if (!isDisabled("DWOF") && maxk > 100) {
LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
}
final DBIDs ids = relation.getDBIDs();
try (PrintStream fout = new PrintStream(outfile)) {
// Control: print the DBIDs in case we are seeing an odd iteration
//
fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
// Label outlier result (reference)
writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
final int startk = (this.startk > 0) ? this.startk : this.stepk;
final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
// Output function:
BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
// KNN
runForEachK(//
"KNN", //
startk, //
stepk, //
maxk, k -> //
new KNNOutlier<O>(distf, k).run(database, relation), out);
// KNN Weight
runForEachK(//
"KNNW", //
startk, //
stepk, //
maxk, k -> //
new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
// Run LOF
runForEachK(//
"LOF", //
startk, //
stepk, //
maxk, k -> //
new LOF<O>(k, distf).run(database, relation), out);
// Run Simplified-LOF
runForEachK(//
"SimplifiedLOF", //
startk, //
stepk, //
maxk, k -> //
new SimplifiedLOF<O>(k, distf).run(database, relation), out);
// LoOP
runForEachK(//
"LoOP", //
startk, //
stepk, //
maxk, k -> //
new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
// LDOF
runForEachK(//
"LDOF", //
startkmin2, //
stepk, //
maxk, k -> //
new LDOF<O>(distf, k).run(database, relation), out);
// Run ODIN
runForEachK(//
"ODIN", //
startk, //
stepk, //
maxk, k -> //
new ODIN<O>(distf, k).run(database, relation), out);
// Run FastABOD
runForEachK(//
"FastABOD", //
startkmin3, //
stepk, //
maxk, k -> //
new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
// Run KDEOS with intrinsic dimensionality 2.
runForEachK(//
"KDEOS", //
startkmin2, //
stepk, //
maxk, k -> new //
KDEOS<O>(//
distf, //
k, //
k, //
GaussianKernelDensityFunction.KERNEL, //
0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), //
2).run(database, relation), out);
// Run LDF
runForEachK(//
"LDF", //
startk, //
stepk, //
maxk, k -> //
new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
// Run INFLO
runForEachK(//
"INFLO", //
startk, //
stepk, //
maxk, k -> //
new INFLO<O>(distf, 1.0, k).run(database, relation), out);
// Run COF
runForEachK(//
"COF", //
startk, //
stepk, //
maxk, k -> //
new COF<O>(k, distf).run(database, relation), out);
// Run simple Intrinsic dimensionality
runForEachK(//
"Intrinsic", //
startkmin2, //
stepk, //
maxk, k -> //
new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
// Run IDOS
runForEachK(//
"IDOS", //
startkmin2, //
stepk, //
maxk, k -> //
new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
// Run simple kernel-density LOF variant
runForEachK(//
"KDLOF", //
startkmin2, //
stepk, //
maxk, k -> //
new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
// Run DWOF (need pairwise distances, too)
runForEachK(//
"DWOF", //
startkmin2, //
stepk, //
maxk, k -> //
new DWOF<O>(distf, k, 1.1).run(database, relation), out);
// Run LIC
runForEachK(//
"LIC", //
startk, //
stepk, //
maxk, k -> //
new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
// Run VOV (requires a vector field).
if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
@SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
@SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
runForEachK(//
"VOV", //
startk, //
stepk, //
maxk, k -> //
new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
}
// Run KNN DD
runForEachK(//
"KNNDD", //
startk, //
stepk, //
maxk, k -> //
new KNNDD<O>(distf, k).run(database, relation), out);
// Run KNN SOS
runForEachK(//
"KNNSOS", //
startk, //
stepk, //
maxk, k -> //
new KNNSOS<O>(distf, k).run(relation), out);
// Run ISOS
runForEachK(//
"ISOS", //
startkmin2, //
stepk, //
maxk, k -> //
new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
} catch (FileNotFoundException e) {
throw new AbortException("Cannot create output file.", e);
}
}
use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.
the class LoOP method run.
/**
* Performs the LoOP algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O> knnComp = pair.getFirst();
KNNQuery<O> knnReach = pair.getSecond();
// Assert we got something
if (knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
if (knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
// FIXME: tie handling!
// Probabilistic distances
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
LOG.beginStep(stepprog, 3, "Computing pdists");
computePDists(relation, knnReach, pdists);
// Compute PLOF values.
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
LOG.beginStep(stepprog, 4, "Computing PLOF");
double nplof = computePLOFs(relation, knnComp, pdists, plofs);
// Normalize the outlier scores.
DoubleMinMax mm = new DoubleMinMax();
{
// compute LOOP_SCORE of each db object
LOG.beginStep(stepprog, 5, "Computing LoOP scores");
FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
final double norm = 1. / (nplof * MathUtil.SQRT2);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
plofs.putDouble(iditer, loop);
mm.put(loop);
LOG.incrementProcessed(progressLOOPs);
}
LOG.ensureCompleted(progressLOOPs);
}
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery in project elki by elki-project.
the class KNNJoinTest method testLinearScan.
@Test
public void testLinearScan() {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
Relation<NumberVector> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// Euclidean
{
DistanceQuery<NumberVector> dq = db.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
KNNQuery<NumberVector> knnq = QueryUtil.getLinearScanKNNQuery(dq);
MeanVariance meansize = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
meansize.put(knnq.getKNNForDBID(iditer, 2).size());
}
org.junit.Assert.assertEquals("Euclidean mean 2NN", mean2nnEuclid, meansize.getMean(), 0.00001);
org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
}
// Manhattan
{
DistanceQuery<NumberVector> dq = db.getDistanceQuery(relation, ManhattanDistanceFunction.STATIC);
KNNQuery<NumberVector> knnq = QueryUtil.getLinearScanKNNQuery(dq);
MeanVariance meansize = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
meansize.put(knnq.getKNNForDBID(iditer, 2).size());
}
org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
}
}
Aggregations