use of de.lmu.ifi.dbs.elki.utilities.documentation.Reference in project elki by elki-project.
the class OptionUtil method describeParameterizable.
/**
* Format a description of a Parameterizable (including recursive options).
*
* @param buf Buffer to append to.
* @param pcls Parameterizable class to describe
* @param width Width
* @param indent Text indent
* @return Formatted description
*/
public static StringBuilder describeParameterizable(StringBuilder buf, Class<?> pcls, int width, String indent) {
try {
println(buf, width, "Description for class " + pcls.getName(), "");
String title = DocumentationUtil.getTitle(pcls);
if (title != null && title.length() > 0) {
println(buf, width, title, "");
}
String desc = DocumentationUtil.getDescription(pcls);
if (desc != null && desc.length() > 0) {
println(buf, width, desc, " ");
}
Reference ref = DocumentationUtil.getReference(pcls);
if (ref != null) {
if (ref.prefix().length() > 0) {
println(buf, width, ref.prefix(), "");
}
println(buf, width, ref.authors() + ":", "");
println(buf, width, ref.title(), " ");
println(buf, width, "in: " + ref.booktitle(), "");
if (ref.url().length() > 0) {
println(buf, width, "see also: " + ref.url(), "");
}
}
SerializedParameterization config = new SerializedParameterization();
TrackParameters track = new TrackParameters(config);
@SuppressWarnings("unused") Object p = ClassGenericsUtil.tryInstantiate(Object.class, pcls, track);
Collection<TrackedParameter> options = track.getAllParameters();
if (!options.isEmpty()) {
OptionUtil.formatForConsole(buf, width, indent, options);
}
return buf;
} catch (Exception e) {
LoggingUtil.exception("Error instantiating class to describe.", e.getCause());
return buf.append("No description available: ").append(e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.documentation.Reference in project elki by elki-project.
the class SphereUtil method latlngMinDistRadFull.
/**
* Point to rectangle minimum distance.
*
* Previous version, only around for reference.
*
* Complexity:
* <ul>
* <li>Trivial cases (on longitude slice): no trigonometric functions.</li>
* <li>Cross-track case: 10+2 trig</li>
* <li>Corner case: 10+3 trig, 1 sqrt</li>
* </ul>
*
* Reference:
* <p>
* Erich Schubert, Arthur Zimek and Hans-Peter Kriegel<br />
* Geodetic Distance Queries on R-Trees for Indexing Geographic Data<br />
* 13th Int. Symposium on Advances in Spatial and Temporal Databases
* </p>
*
* @param plat Latitude of query point.
* @param plng Longitude of query point.
* @param rminlat Min latitude of rectangle.
* @param rminlng Min longitude of rectangle.
* @param rmaxlat Max latitude of rectangle.
* @param rmaxlng Max longitude of rectangle.
* @return Distance in radians
*/
@//
Reference(//
authors = "Erich Schubert, Arthur Zimek and Hans-Peter Kriegel", //
title = "Geodetic Distance Queries on R-Trees for Indexing Geographic Data", //
booktitle = "13th Int. Symposium on Advances in Spatial and Temporal Databases", url = "http://dx.doi.org/10.1007/978-3-642-40235-7_9")
public static double latlngMinDistRadFull(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) {
// Degenerate rectangles:
if ((rminlat >= rmaxlat) && (rminlng >= rmaxlng)) {
return haversineFormulaRad(rminlat, rminlng, plat, plng);
}
// The simplest case is when the query point is in the same "slice":
if (rminlng <= plng && plng <= rmaxlng) {
return // Inside
(rminlat <= plat && plat <= rmaxlat) ? // Inside
0 : // S, N
(plat < rminlat) ? rminlat - plat : plat - rmaxlat;
}
// Determine whether going east or west is shorter.
double lngE = rminlng - plng;
lngE = lngE >= 0 ? lngE : lngE + TWOPI;
// we keep this negative!
double lngW = plng - rmaxlng;
lngW = lngW >= 0 ? lngW : lngW + TWOPI;
// Compute sine and cosine values we will certainly need below:
// To return cosine
final DoubleWrapper tmp = new DoubleWrapper();
final double slatQ = sinAndCos(plat, tmp), clatQ = tmp.value;
final double slatN = sinAndCos(rmaxlat, tmp), clatN = tmp.value;
final double slatS = sinAndCos(rminlat, tmp), clatS = tmp.value;
// Head east, to min edge:
if (lngE <= lngW) {
final double slngD = sinAndCos(lngE, tmp), clngD = tmp.value;
// Bearing to south
// atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD);
// Bearing from south
final double bs = atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD);
// Bearing to north
// atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD);
// Bearing from north
final double bn = atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD);
if (bs < HALFPI && bn > HALFPI) {
// Radians from south pole = abs(ATD)
final double radFromS = -HALFPI - plat;
// Cross-track-distance to longitude line.
return asin(sin(radFromS) * -slngD);
}
if (bs - HALFPI < HALFPI - bn) {
// Haversine to north corner.
final double slatN2 = sin((plat - rmaxlat) * .5);
final double slon = sin(lngE * .5);
final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN;
return 2 * atan2(sqrt(aN), sqrt(1 - aN));
} else {
// Haversine to south corner.
final double slatS2 = sin((plat - rminlat) * .5);
final double slon = sin(lngE * .5);
final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS;
return 2 * atan2(sqrt(aS), sqrt(1 - aS));
}
} else {
// Head west, to max edge
final double slngD = -sinAndCos(lngW, tmp), clngD = tmp.value;
// Bearing to south
// atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD);
// Bearing from south
final double bs = atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD);
// Bearing to north
// atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD);
// Bearing from north
final double bn = atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD);
if (bs > -HALFPI && bn < -HALFPI) {
// Radians from south = abs(ATD) = distance from pole
final double radFromS = -HALFPI - plat;
// Cross-track-distance to longitude line.
return asin(sin(radFromS) * slngD);
}
if (-HALFPI - bs < bn + HALFPI) {
// Haversine to north corner.
final double slatN2 = sin((plat - rmaxlat) * .5);
final double slon = sin(lngW * .5);
final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN;
return 2 * atan2(sqrt(aN), sqrt(1 - aN));
} else {
// Haversine to south corner.
final double slatS2 = sin((plat - rminlat) * .5);
final double slon = sin(lngW * .5);
final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS;
return 2 * atan2(sqrt(aS), sqrt(1 - aS));
}
}
}
use of de.lmu.ifi.dbs.elki.utilities.documentation.Reference in project elki by elki-project.
the class ComputeKNNOutlierScores method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
// Ensure we don't go beyond the relation size:
final int maxk = Math.min(this.maxk, relation.size() - 1);
// Get a KNN query.
final int lim = Math.min(maxk + 2, relation.size());
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, distf, lim);
// Precompute kNN:
if (!(knnq instanceof PreprocessorKNNQuery)) {
MaterializeKNNPreprocessor<O> preproc = new MaterializeKNNPreprocessor<>(relation, distf, lim);
preproc.initialize();
relation.getHierarchy().add(relation, preproc);
}
// Test that we now get a proper index query
knnq = QueryUtil.getKNNQuery(relation, distf, lim);
if (!(knnq instanceof PreprocessorKNNQuery)) {
throw new AbortException("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
}
// Warn for some known slow methods and large k:
if (!isDisabled("LDOF") && maxk > 100) {
LOG.verbose("Note: LODF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " LDOF to disable.");
}
if (!isDisabled("FastABOD") && maxk > 100) {
LOG.warning("Note: FastABOD needs quadratic memory. Use -" + Parameterizer.DISABLE_ID.getName() + " FastABOD to disable.");
}
if (!isDisabled("DWOF") && maxk > 100) {
LOG.warning("Note: DWOF needs O(k^2) distance computations. Use -" + Parameterizer.DISABLE_ID.getName() + " DWOF to disable.");
}
final DBIDs ids = relation.getDBIDs();
try (PrintStream fout = new PrintStream(outfile)) {
// Control: print the DBIDs in case we are seeing an odd iteration
//
fout.append("# Data set size: " + relation.size()).append(" data type: " + relation.getDataTypeInformation()).append(FormatUtil.NEWLINE);
// Label outlier result (reference)
writeResult(fout, ids, bylabel.run(database), new IdentityScaling(), "bylabel");
final int startk = (this.startk > 0) ? this.startk : this.stepk;
final int startkmin2 = (startk >= 2) ? startk : (startk + stepk);
final int startkmin3 = (startk >= 3) ? startk : (startkmin2 >= 3) ? startkmin2 : (startkmin2 + stepk);
// Output function:
BiConsumer<String, OutlierResult> out = (kstr, result) -> writeResult(fout, ids, result, scaling, kstr);
// KNN
runForEachK(//
"KNN", //
startk, //
stepk, //
maxk, k -> //
new KNNOutlier<O>(distf, k).run(database, relation), out);
// KNN Weight
runForEachK(//
"KNNW", //
startk, //
stepk, //
maxk, k -> //
new KNNWeightOutlier<O>(distf, k).run(database, relation), out);
// Run LOF
runForEachK(//
"LOF", //
startk, //
stepk, //
maxk, k -> //
new LOF<O>(k, distf).run(database, relation), out);
// Run Simplified-LOF
runForEachK(//
"SimplifiedLOF", //
startk, //
stepk, //
maxk, k -> //
new SimplifiedLOF<O>(k, distf).run(database, relation), out);
// LoOP
runForEachK(//
"LoOP", //
startk, //
stepk, //
maxk, k -> //
new LoOP<O>(k, k, distf, distf, 1.0).run(database, relation), out);
// LDOF
runForEachK(//
"LDOF", //
startkmin2, //
stepk, //
maxk, k -> //
new LDOF<O>(distf, k).run(database, relation), out);
// Run ODIN
runForEachK(//
"ODIN", //
startk, //
stepk, //
maxk, k -> //
new ODIN<O>(distf, k).run(database, relation), out);
// Run FastABOD
runForEachK(//
"FastABOD", //
startkmin3, //
stepk, //
maxk, k -> //
new FastABOD<O>(new PolynomialKernelFunction(2), k).run(database, relation), out);
// Run KDEOS with intrinsic dimensionality 2.
runForEachK(//
"KDEOS", //
startkmin2, //
stepk, //
maxk, k -> new //
KDEOS<O>(//
distf, //
k, //
k, //
GaussianKernelDensityFunction.KERNEL, //
0., 0.5 * GaussianKernelDensityFunction.KERNEL.canonicalBandwidth(), //
2).run(database, relation), out);
// Run LDF
runForEachK(//
"LDF", //
startk, //
stepk, //
maxk, k -> //
new LDF<O>(k, distf, GaussianKernelDensityFunction.KERNEL, 1., .1).run(database, relation), out);
// Run INFLO
runForEachK(//
"INFLO", //
startk, //
stepk, //
maxk, k -> //
new INFLO<O>(distf, 1.0, k).run(database, relation), out);
// Run COF
runForEachK(//
"COF", //
startk, //
stepk, //
maxk, k -> //
new COF<O>(k, distf).run(database, relation), out);
// Run simple Intrinsic dimensionality
runForEachK(//
"Intrinsic", //
startkmin2, //
stepk, //
maxk, k -> //
new IntrinsicDimensionalityOutlier<O>(distf, k, AggregatedHillEstimator.STATIC).run(database, relation), out);
// Run IDOS
runForEachK(//
"IDOS", //
startkmin2, //
stepk, //
maxk, k -> //
new IDOS<O>(distf, AggregatedHillEstimator.STATIC, k, k).run(database, relation), out);
// Run simple kernel-density LOF variant
runForEachK(//
"KDLOF", //
startkmin2, //
stepk, //
maxk, k -> //
new SimpleKernelDensityLOF<O>(k, distf, GaussianKernelDensityFunction.KERNEL).run(database, relation), out);
// Run DWOF (need pairwise distances, too)
runForEachK(//
"DWOF", //
startkmin2, //
stepk, //
maxk, k -> //
new DWOF<O>(distf, k, 1.1).run(database, relation), out);
// Run LIC
runForEachK(//
"LIC", //
startk, //
stepk, //
maxk, k -> //
new LocalIsolationCoefficient<O>(distf, k).run(database, relation), out);
// Run VOV (requires a vector field).
if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(relation.getDataTypeInformation())) {
@SuppressWarnings("unchecked") final DistanceFunction<? super DoubleVector> df = (DistanceFunction<? super DoubleVector>) distf;
@SuppressWarnings("unchecked") final Relation<DoubleVector> rel = (Relation<DoubleVector>) (Relation<?>) relation;
runForEachK(//
"VOV", //
startk, //
stepk, //
maxk, k -> //
new VarianceOfVolume<DoubleVector>(k, df).run(database, rel), out);
}
// Run KNN DD
runForEachK(//
"KNNDD", //
startk, //
stepk, //
maxk, k -> //
new KNNDD<O>(distf, k).run(database, relation), out);
// Run KNN SOS
runForEachK(//
"KNNSOS", //
startk, //
stepk, //
maxk, k -> //
new KNNSOS<O>(distf, k).run(relation), out);
// Run ISOS
runForEachK(//
"ISOS", //
startkmin2, //
stepk, //
maxk, k -> //
new ISOS<O>(distf, k, AggregatedHillEstimator.STATIC).run(relation), out);
} catch (FileNotFoundException e) {
throw new AbortException("Cannot create output file.", e);
}
}
use of de.lmu.ifi.dbs.elki.utilities.documentation.Reference in project elki by elki-project.
the class SURFINGDependenceMeasure method dependence.
@//
Reference(//
authors = "Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and Peer Kröger", //
title = "Subspace Selection for Clustering High-Dimensional Data", //
booktitle = "IEEE International Conference on Data Mining, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10112")
@Override
public <A, B> double dependence(NumberArrayAdapter<?, A> adapter1, A data1, NumberArrayAdapter<?, B> adapter2, B data2) {
final int len = size(adapter1, data1, adapter2, data2);
final int k = Math.max(1, len / 10);
double[] knns = new double[len];
DoubleMinHeap heap = new DoubleMinHeap(k);
double kdistmean = 0.;
for (int i = 0; i < len; ++i) {
double ix = adapter1.getDouble(data1, i), iy = adapter2.getDouble(data2, i);
heap.clear();
for (int j = 0; j < len; ++j) {
double jx = adapter1.getDouble(data1, j), jy = adapter2.getDouble(data2, j);
double dx = ix - jx, dy = iy - jy;
// Squared Euclidean.
heap.add(dx * dx + dy * dy);
}
// Euclidean
double kdist = FastMath.sqrt(heap.peek());
knns[i] = kdist;
kdistmean += kdist;
}
kdistmean /= len;
// Deviation from mean:
double diff = 0.;
int below = 0;
for (int l = 0; l < knns.length; l++) {
diff += Math.abs(kdistmean - knns[l]);
if (knns[l] < kdistmean) {
below++;
}
}
return (below > 0) ? diff / (2. * kdistmean * below) : 0;
}
use of de.lmu.ifi.dbs.elki.utilities.documentation.Reference in project elki by elki-project.
the class RANSACCovarianceMatrixBuilder method processIds.
@//
Reference(//
title = "Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography", //
authors = "M.A. Fischler, R.C. Bolles", //
booktitle = "Communications of the ACM, Vol. 24 Issue 6", url = "http://dx.doi.org/10.1145/358669.358692")
@Override
public double[][] processIds(DBIDs ids, Relation<? extends NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
ModifiableDBIDs best = DBIDUtil.newHashSet(), support = DBIDUtil.newHashSet();
double tresh = ChiSquaredDistribution.quantile(0.85, dim);
CovarianceMatrix cv = new CovarianceMatrix(dim);
Random random = rnd.getSingleThreadedRandom();
for (int i = 0; i < iterations; i++) {
DBIDs sample = DBIDUtil.randomSample(ids, dim + 1, random);
cv.reset();
for (DBIDIter it = sample.iter(); it.valid(); it.advance()) {
cv.put(relation.get(it));
}
double[] centroid = cv.getMeanVector();
double[][] p = inverse(cv.destroyToSampleMatrix());
support.clear();
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double[] vec = minusEquals(relation.get(id).toArray(), centroid);
double sqlen = transposeTimesTimes(vec, p, vec);
if (sqlen < tresh) {
support.add(id);
}
}
if (support.size() > best.size()) {
ModifiableDBIDs swap = best;
best = support;
support = swap;
}
if (support.size() >= ids.size()) {
// Can't get better than this!
break;
}
}
// Fall back to regular PCA if too few samples.
return CovarianceMatrix.make(relation, best.size() > dim ? best : ids).destroyToSampleMatrix();
}
Aggregations