use of de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling in project elki by elki-project.
the class ComputeKNNOutlierScores method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
// Ensure we don't go beyond the relation size:
final int maxk = Math.min(this.maxk, relation.size() - 1);
// Get a KNN query.
final int lim = Math.min(maxk + 2, relation.size());
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
// Precompute kNN:
if (!(knnq instanceof PreprocessorKNNQuery)) {
MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
preproc.initialize();
relation.getHierarchy().add(relation, preproc);
}
// Test that we now get a proper index query
knnq = QueryUtil.getKNNQuery(relation, distf, lim);
if (!(knnq instanceof PreprocessorKNNQuery)) {
throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
}
// Warn for some known slow methods and large k:
if (!isDisabled("LDOF") && maxk > 100) {
LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
}
if (!isDisabled("FastABOD") && maxk > 100) {
LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
}
if (!isDisabled("DWOF") && maxk > 100) {
LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
}
final DBIDs ids = relation.getDBIDs();
try (PrintStream fout = new PrintStream(outfile)) {
// Control: print the DBIDs in case we are seeing an odd iteration
//
fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
// Label outlier result (reference)
writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
final int startk = (this.startk > 0) ? this.startk : this.stepk;
final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
// Output function:
BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
// KNN
runForEachK(//
"KNN", //
startk, //
stepk, //
maxk, k -> //
new KNNOutlier<O>(distf, k).run(database, relation), out);
// KNN Weight
runForEachK(//
"KNNW", //
startk, //
stepk, //
maxk, k -> //
new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
// Run LOF
runForEachK(//
"LOF", //
startk, //
stepk, //
maxk, k -> //
new LOF<O>(k, distf).run(database, relation), out);
// Run Simplified-LOF
runForEachK(//
"SimplifiedLOF", //
startk, //
stepk, //
maxk, k -> //
new SimplifiedLOF<O>(k, distf).run(database, relation), out);
// LoOP
runForEachK(//
"LoOP", //
startk, //
stepk, //
maxk, k -> //
new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
// LDOF
runForEachK(//
"LDOF", //
startkmin2, //
stepk, //
maxk, k -> //
new LDOF<O>(distf, k).run(database, relation), out);
// Run ODIN
runForEachK(//
"ODIN", //
startk, //
stepk, //
maxk, k -> //
new ODIN<O>(distf, k).run(database, relation), out);
// Run FastABOD
runForEachK(//
"FastABOD", //
startkmin3, //
stepk, //
maxk, k -> //
new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
// Run KDEOS with intrinsic dimensionality 2.
runForEachK(//
"KDEOS", //
startkmin2, //
stepk, //
maxk, k -> new //
KDEOS<O>(//
distf, //
k, //
k, //
GaussianKernelDensityFunction.KERNEL, //
0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), //
2).run(database, relation), out);
// Run LDF
runForEachK(//
"LDF", //
startk, //
stepk, //
maxk, k -> //
new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
// Run INFLO
runForEachK(//
"INFLO", //
startk, //
stepk, //
maxk, k -> //
new INFLO<O>(distf, 1.0, k).run(database, relation), out);
// Run COF
runForEachK(//
"COF", //
startk, //
stepk, //
maxk, k -> //
new COF<O>(k, distf).run(database, relation), out);
// Run simple Intrinsic dimensionality
runForEachK(//
"Intrinsic", //
startkmin2, //
stepk, //
maxk, k -> //
new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
// Run IDOS
runForEachK(//
"IDOS", //
startkmin2, //
stepk, //
maxk, k -> //
new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
// Run simple kernel-density LOF variant
runForEachK(//
"KDLOF", //
startkmin2, //
stepk, //
maxk, k -> //
new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
// Run DWOF (need pairwise distances, too)
runForEachK(//
"DWOF", //
startkmin2, //
stepk, //
maxk, k -> //
new DWOF<O>(distf, k, 1.1).run(database, relation), out);
// Run LIC
runForEachK(//
"LIC", //
startk, //
stepk, //
maxk, k -> //
new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
// Run VOV (requires a vector field).
if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
@SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
@SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
runForEachK(//
"VOV", //
startk, //
stepk, //
maxk, k -> //
new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
}
// Run KNN DD
runForEachK(//
"KNNDD", //
startk, //
stepk, //
maxk, k -> //
new KNNDD<O>(distf, k).run(database, relation), out);
// Run KNN SOS
runForEachK(//
"KNNSOS", //
startk, //
stepk, //
maxk, k -> //
new KNNSOS<O>(distf, k).run(relation), out);
// Run ISOS
runForEachK(//
"ISOS", //
startkmin2, //
stepk, //
maxk, k -> //
new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
} catch (FileNotFoundException e) {
throw new AbortException("Cannot create output file.", e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling in project elki by elki-project.
the class JudgeOutlierScores method computeScore.
/**
* Evaluate a single outlier score result.
*
* @param ids Inlier IDs
* @param outlierIds Outlier IDs
* @param or Outlier Result to evaluate
* @return Outlier score result
* @throws IllegalStateException
*/
protected ScoreResult computeScore(DBIDs ids, DBIDs outlierIds, OutlierResult or) throws IllegalStateException {
if (scaling instanceof OutlierScalingFunction) {
OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
oscaling.prepare(or);
}
final ScalingFunction innerScaling;
// If we have useful (finite) min/max, use these for binning.
double min = scaling.getMin();
double max = scaling.getMax();
if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
innerScaling = new IdentityScaling();
// TODO: does the outlier score give us this guarantee?
LOG.warning("JudgeOutlierScores expects values between 0.0 and 1.0, but we don't have such a guarantee by the scaling function: min:" + min + " max:" + max);
} else {
if (min == 0.0 && max == 1.0) {
innerScaling = new IdentityScaling();
} else {
innerScaling = new LinearScaling(1.0 / (max - min), -min);
}
}
double posscore = 0.0;
double negscore = 0.0;
// fill histogram with values of each object
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
posscore += (1.0 - result);
}
for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
negscore += result;
}
posscore /= ids.size();
negscore /= outlierIds.size();
LOG.verbose("Scores: " + posscore + " " + negscore);
ArrayList<double[]> s = new ArrayList<>(1);
s.add(new double[] { (posscore + negscore) * .5, posscore, negscore });
return new ScoreResult(s);
}
Aggregations