use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DistanceQuantileSampler method run.
/**
* Run the distance quantile sampler.
*
* @param database
* @param rel
* @return Distances sample
*/
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
int size = rel.size();
long pairs = (size * (long) size) >> 1;
final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
if (ssize > Integer.MAX_VALUE) {
throw new AbortException("Sampling size too large.");
}
final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
Random r = rand.getSingleThreadedRandom();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
for (long i = 0; i < ssize; i++) {
int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
double dist = dq.distance(i1.seek(x), i2.seek(y));
// Skip NaN, and/or zeros.
if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
continue;
}
heap.add(dist, qsize);
LOG.incrementProcessed(prog);
}
LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
LOG.ensureCompleted(prog);
Collection<String> header = Arrays.asList(new String[] { "Distance" });
Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DistanceStatisticsWithClasses method exactMinMax.
/**
* Compute the exact maximum and minimum.
*
* @param relation Relation to process
* @param distFunc Distance function
* @return Exact maximum and minimum
*/
private DoubleMinMax exactMinMax(Relation<O> relation, DistanceQuery<O> distFunc) {
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Exact fitting distance computations", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
// find exact minimum and maximum first.
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
for (DBIDIter iditer2 = relation.iterDBIDs(); iditer2.valid(); iditer2.advance()) {
// skip the point itself.
if (DBIDUtil.equal(iditer, iditer2)) {
continue;
}
double d = distFunc.distance(iditer, iditer2);
minmax.put(d);
}
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
return minmax;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class EvaluateRankingQuality method run.
@Override
public HistogramResult run(Database database) {
final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<V> knnQuery = database.getKNNQuery(distQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// Compute cluster averages and covariance matrix
HashMap<Cluster<?>, double[]> averages = new HashMap<>(split.size());
HashMap<Cluster<?>, double[][]> covmats = new HashMap<>(split.size());
for (Cluster<?> clus : split) {
CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
averages.put(clus, covmat.getMeanVector());
covmats.put(clus, covmat.destroyToPopulationMatrix());
}
MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
// sort neighbors
for (Cluster<?> clus : split) {
ModifiableDoubleDBIDList cmem = DBIDUtil.newDistanceDBIDList(clus.size());
double[] av = averages.get(clus);
double[][] covm = covmats.get(clus);
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
double d = mahalanobisDistance(covm, relation.get(iter).toArray(), av);
cmem.add(d, iter);
}
cmem.sort();
for (DBIDArrayIter it = cmem.iter(); it.valid(); it.advance()) {
KNNList knn = knnQuery.getKNNForDBID(it, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
hist.put(((double) it.getOffset()) / clus.size(), result);
LOG.incrementProcessed(rocloop);
}
}
LOG.ensureCompleted(rocloop);
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
}
return new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class RangeQuerySelectivity method run.
public Result run(Database database, Relation<V> relation) {
DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
MeanVariance numres = new MeanVariance();
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
final String prefix = this.getClass().getName();
LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
return null;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class KNNDistancesSampler method run.
/**
* Provides an order of the kNN-distances for all objects within the specified
* database.
*
* @param database Database
* @param relation Relation
* @return Result
*/
public KNNDistanceOrderResult run(Database database, Relation<O> relation) {
final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
final int size = (int) ((sample <= 1.) ? Math.ceil(relation.size() * sample) : sample);
DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), size, rnd);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling kNN distances", size, LOG) : null;
double[] knnDistances = new double[size];
int i = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance(), i++) {
final KNNList neighbors = knnQuery.getKNNForDBID(iditer, k + 1);
knnDistances[i] = neighbors.getKNNDistance();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return new KNNDistanceOrderResult(knnDistances, k);
}
Aggregations