use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class ComputeKNNOutlierScores method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
// Ensure we don't go beyond the relation size:
final int maxk = Math.min(this.maxk, relation.size() - 1);
// Get a KNN query.
final int lim = Math.min(maxk + 2, relation.size());
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
// Precompute kNN:
if (!(knnq instanceof PreprocessorKNNQuery)) {
MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
preproc.initialize();
relation.getHierarchy().add(relation, preproc);
}
// Test that we now get a proper index query
knnq = QueryUtil.getKNNQuery(relation, distf, lim);
if (!(knnq instanceof PreprocessorKNNQuery)) {
throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
}
// Warn for some known slow methods and large k:
if (!isDisabled("LDOF") && maxk > 100) {
LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
}
if (!isDisabled("FastABOD") && maxk > 100) {
LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
}
if (!isDisabled("DWOF") && maxk > 100) {
LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
}
final DBIDs ids = relation.getDBIDs();
try (PrintStream fout = new PrintStream(outfile)) {
// Control: print the DBIDs in case we are seeing an odd iteration
//
fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
// Label outlier result (reference)
writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
final int startk = (this.startk > 0) ? this.startk : this.stepk;
final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
// Output function:
BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
// KNN
runForEachK(//
"KNN", //
startk, //
stepk, //
maxk, k -> //
new KNNOutlier<O>(distf, k).run(database, relation), out);
// KNN Weight
runForEachK(//
"KNNW", //
startk, //
stepk, //
maxk, k -> //
new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
// Run LOF
runForEachK(//
"LOF", //
startk, //
stepk, //
maxk, k -> //
new LOF<O>(k, distf).run(database, relation), out);
// Run Simplified-LOF
runForEachK(//
"SimplifiedLOF", //
startk, //
stepk, //
maxk, k -> //
new SimplifiedLOF<O>(k, distf).run(database, relation), out);
// LoOP
runForEachK(//
"LoOP", //
startk, //
stepk, //
maxk, k -> //
new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
// LDOF
runForEachK(//
"LDOF", //
startkmin2, //
stepk, //
maxk, k -> //
new LDOF<O>(distf, k).run(database, relation), out);
// Run ODIN
runForEachK(//
"ODIN", //
startk, //
stepk, //
maxk, k -> //
new ODIN<O>(distf, k).run(database, relation), out);
// Run FastABOD
runForEachK(//
"FastABOD", //
startkmin3, //
stepk, //
maxk, k -> //
new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
// Run KDEOS with intrinsic dimensionality 2.
runForEachK(//
"KDEOS", //
startkmin2, //
stepk, //
maxk, k -> new //
KDEOS<O>(//
distf, //
k, //
k, //
GaussianKernelDensityFunction.KERNEL, //
0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), //
2).run(database, relation), out);
// Run LDF
runForEachK(//
"LDF", //
startk, //
stepk, //
maxk, k -> //
new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
// Run INFLO
runForEachK(//
"INFLO", //
startk, //
stepk, //
maxk, k -> //
new INFLO<O>(distf, 1.0, k).run(database, relation), out);
// Run COF
runForEachK(//
"COF", //
startk, //
stepk, //
maxk, k -> //
new COF<O>(k, distf).run(database, relation), out);
// Run simple Intrinsic dimensionality
runForEachK(//
"Intrinsic", //
startkmin2, //
stepk, //
maxk, k -> //
new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
// Run IDOS
runForEachK(//
"IDOS", //
startkmin2, //
stepk, //
maxk, k -> //
new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
// Run simple kernel-density LOF variant
runForEachK(//
"KDLOF", //
startkmin2, //
stepk, //
maxk, k -> //
new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
// Run DWOF (need pairwise distances, too)
runForEachK(//
"DWOF", //
startkmin2, //
stepk, //
maxk, k -> //
new DWOF<O>(distf, k, 1.1).run(database, relation), out);
// Run LIC
runForEachK(//
"LIC", //
startk, //
stepk, //
maxk, k -> //
new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
// Run VOV (requires a vector field).
if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
@SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
@SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
runForEachK(//
"VOV", //
startk, //
stepk, //
maxk, k -> //
new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
}
// Run KNN DD
runForEachK(//
"KNNDD", //
startk, //
stepk, //
maxk, k -> //
new KNNDD<O>(distf, k).run(database, relation), out);
// Run KNN SOS
runForEachK(//
"KNNSOS", //
startk, //
stepk, //
maxk, k -> //
new KNNSOS<O>(distf, k).run(relation), out);
// Run ISOS
runForEachK(//
"ISOS", //
startkmin2, //
stepk, //
maxk, k -> //
new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
} catch (FileNotFoundException e) {
throw new AbortException("Cannot create output file.", e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class EvaluatePrecomputedOutlierScores method processRow.
private void processRow(PrintStream fout, NumberVector vec, String label) {
if (checkForNaNs(vec)) {
LOG.warning("NaN value encountered in vector " + label);
return;
}
if (positive == null) {
if (!label.matches("bylabel")) {
throw new AbortException("No 'by label' reference outlier found, which is needed for evaluation!");
}
positive = new VectorNonZero(vec);
endcg = NDCGEvaluation.STATIC.expected(positive.numPositive(), positive.getDimensionality());
return;
}
AbstractVectorIter iter = reverse.matcher(label).find() ? new IncreasingVectorIter(vec) : new DecreasingVectorIter(vec);
double rate = positive.numPositive() / (double) positive.getDimensionality();
double auc = ROCEvaluation.STATIC.evaluate(positive, iter.seek(0));
double avep = AveragePrecisionEvaluation.STATIC.evaluate(positive, iter.seek(0));
double rprecision = PrecisionAtKEvaluation.RPRECISION.evaluate(positive, iter.seek(0));
double maxf1 = MaximumF1Evaluation.STATIC.evaluate(positive, iter.seek(0));
double dcg = DCGEvaluation.STATIC.evaluate(positive, iter.seek(0));
double ndcg = NDCGEvaluation.STATIC.evaluate(positive, iter.seek(0));
double adjauc = 2 * auc - 1;
double adjrprecision = (rprecision - rate) / (1 - rate);
double adjavep = (avep - rate) / (1 - rate);
double adjmaxf1 = (maxf1 - rate) / (1 - rate);
double adjdcg = (ndcg - endcg) / (1 - endcg);
final int p = label.lastIndexOf('-');
String prefix = label.substring(0, p);
int k = Integer.valueOf(label.substring(p + 1));
// Write CSV
if (name != null) {
fout.append('"').append(name).append("\",");
}
fout.append('"').append(prefix).append('"');
fout.append(',').append(Integer.toString(k));
fout.append(',').append(Double.toString(auc));
fout.append(',').append(Double.toString(avep));
fout.append(',').append(Double.toString(rprecision));
fout.append(',').append(Double.toString(maxf1));
fout.append(',').append(Double.toString(dcg));
fout.append(',').append(Double.toString(ndcg));
fout.append(',').append(Double.toString(adjauc));
fout.append(',').append(Double.toString(adjavep));
fout.append(',').append(Double.toString(adjrprecision));
fout.append(',').append(Double.toString(adjmaxf1));
fout.append(',').append(Double.toString(adjdcg));
fout.append('\n');
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class EvaluatePrecomputedOutlierScores method run.
@Override
public void run() {
try (//
FileInputStream fis = new FileInputStream(infile);
//
InputStream is = new BufferedInputStream(FileUtil.tryGzipInput(fis));
FileOutputStream fosResult = new FileOutputStream(outfile, true);
PrintStream fout = new PrintStream(fosResult);
FileChannel chan = fosResult.getChannel()) {
// Setup the input stream.
parser.initStream(is);
// Lock the output file:
chan.lock();
if (chan.position() == 0L) {
writeHeader(fout);
} else {
LOG.info("Appending to existing output " + outfile);
}
int lcol = -1, dcol = -1;
loop: while (true) {
BundleStreamSource.Event ev = parser.nextEvent();
switch(ev) {
case END_OF_STREAM:
break loop;
case META_CHANGED:
BundleMeta meta = parser.getMeta();
lcol = -1;
dcol = -1;
for (int i = 0; i < meta.size(); i++) {
SimpleTypeInformation<?> m = meta.get(i);
if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(m)) {
if (dcol >= 0) {
throw new AbortException("More than one vector column.");
}
dcol = i;
} else if (TypeUtil.GUESSED_LABEL.isAssignableFromType(m)) {
if (lcol >= 0) {
throw new AbortException("More than one label column.");
}
lcol = i;
} else {
throw new AbortException("Unexpected data column type: " + m);
}
}
break;
case NEXT_OBJECT:
if (lcol < 0) {
throw new AbortException("No label column available.");
}
if (dcol < 0) {
throw new AbortException("No vector column available.");
}
processRow(fout, (NumberVector) parser.data(dcol), parser.data(lcol).toString());
break;
}
}
} catch (IOException e) {
throw new AbortException("IO error.", e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class EvaluateRetrievalPerformance method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
// For storing the positive neighbors.
ModifiableDBIDs posn = DBIDUtil.newHashSet();
// Distance storage.
ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
// For counting labels seen in kNN
Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
// Statistics tracking
double map = 0., mroc = 0.;
double[] knnperf = new double[maxk];
int samples = 0;
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
Object label = lrelation.get(iter);
findMatches(posn, lrelation, label);
if (posn.size() > 0) {
computeDistances(nlist, iter, distQuery, relation);
if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
}
map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
samples += 1;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
if (samples < 1) {
throw new AbortException("No object matched - are labels parsed correctly?");
}
if (!(map >= 0) || !(mroc >= 0)) {
throw new AbortException("NaN in MAP/ROC.");
}
map /= samples;
mroc /= samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
for (int k = 0; k < maxk; k++) {
knnperf[k] = knnperf[k] / samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
}
return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.
the class HopkinsStatisticClusteringTendency method initializeDataExtends.
/**
* Initialize the uniform sampling area.
*
* @param relation Data relation
* @param dim Dimensionality
* @param min Minima output array (preallocated!)
* @param extend Data extend output array (preallocated!)
*/
protected void initializeDataExtends(Relation<NumberVector> relation, int dim, double[] min, double[] extend) {
assert (min.length == dim && extend.length == dim);
// from dataset
if (minima == null || maxima == null || minima.length == 0 || maxima.length == 0) {
double[][] minmax = RelationUtil.computeMinMax(relation);
final double[] dmin = minmax[0], dmax = minmax[1];
for (int d = 0; d < dim; d++) {
min[d] = dmin[d];
extend[d] = dmax[d] - dmin[d];
}
return;
}
if (minima.length == dim) {
System.arraycopy(minima, 0, min, 0, dim);
} else if (minima.length == 1) {
Arrays.fill(min, minima[0]);
} else {
throw new AbortException("Invalid minima specified: expected " + dim + " got minima dimensionality: " + minima.length);
}
if (maxima.length == dim) {
for (int d = 0; d < dim; d++) {
extend[d] = maxima[d] - min[d];
}
return;
} else if (maxima.length == 1) {
for (int d = 0; d < dim; d++) {
extend[d] = maxima[0] - min[d];
}
return;
} else {
throw new AbortException("Invalid maxima specified: expected " + dim + " got maxima dimensionality: " + maxima.length);
}
}
Aggregations