use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class DistanceQuantileSampler method run.
/**
* Run the distance quantile sampler.
*
* @param database
* @param rel
* @return Distances sample
*/
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
int size = rel.size();
long pairs = (size * (long) size) >> 1;
final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
if (ssize > Integer.MAX_VALUE) {
throw new AbortException("Sampling size too large.");
}
final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
Random r = rand.getSingleThreadedRandom();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
for (long i = 0; i < ssize; i++) {
int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
double dist = dq.distance(i1.seek(x), i2.seek(y));
// Skip NaN, and/or zeros.
if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
continue;
}
heap.add(dist, qsize);
LOG.incrementProcessed(prog);
}
LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
LOG.ensureCompleted(prog);
Collection<String> header = Arrays.asList(new String[] { "Distance" });
Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class RangeQuerySelectivity method run.
public Result run(Database database, Relation<V> relation) {
DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
MeanVariance numres = new MeanVariance();
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
final String prefix = this.getClass().getName();
LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
return null;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class INFLO method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("INFLO", 3) : null;
// Step one: find the kNN
LOG.beginStep(stepprog, 1, "Materializing nearest-neighbor sets.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), kplus1);
// Step two: find the RkNN, minus kNN.
LOG.beginStep(stepprog, 2, "Materialize reverse NN.");
ModifiableDBIDs pruned = DBIDUtil.newHashSet();
// RNNS
WritableDataStore<ModifiableDBIDs> rnns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
// init the rNN
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
rnns.put(iditer, DBIDUtil.newArray());
}
computeNeighborhoods(relation, knnq, pruned, rnns);
// Step three: compute INFLO scores
LOG.beginStep(stepprog, 3, "Compute INFLO scores.");
// Calculate INFLO for any Object
DoubleMinMax inflominmax = new DoubleMinMax();
WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// Note: this modifies knns, by adding rknns!
computeINFLO(relation, pruned, knnq, rnns, inflos, inflominmax);
LOG.setCompleted(stepprog);
LOG.statistics(new LongStatistic(INFLO.class.getName() + ".pruned", pruned.size()));
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Influence Outlier Score", "inflo-outlier", inflos, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class KMeansElkan method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
// Elkan bounds
WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
WritableDataStore<double[]> lower = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, double[].class);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Filled with 0.
lower.put(it, new double[k]);
}
// Storage for updated means:
final int dim = means[0].length;
double[][] sums = new double[k][dim];
// Cluster separation
double[] sep = new double[k];
// Cluster distances
double[][] cdist = new double[k][k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
int changed;
if (iteration == 0) {
changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
} else {
// #1
recomputeSeperation(means, sep, cdist);
changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, cdist, upper, lower);
}
if (rstat != null) {
rstat.setLong(changed);
LOG.statistics(rstat);
}
// Stop if no cluster assignment changed.
if (changed == 0) {
break;
}
// Recompute means.
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
timesEquals(sums[i], s > 0 ? 1. / s : 1.);
}
// Overwrites sep
maxMoved(means, sums, sep);
updateBounds(relation, assignment, upper, lower, sep);
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
System.arraycopy(sums[i], 0, means[i], 0, dim);
// Restore to sum for next iteration
timesEquals(sums[i], s > 0 ? s : 1.);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
upper.destroy();
lower.destroy();
// Wrap result
double totalvariance = 0.;
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
double[] mean = means[i];
double varsum = 0.;
if (varstat) {
DoubleVector mvec = DoubleVector.wrap(mean);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
varsum += distanceFunction.distance(mvec, relation.get(it));
}
totalvariance += varsum;
}
KMeansModel model = new KMeansModel(mean, varsum);
result.addToplevelCluster(new Cluster<>(ids, model));
}
if (LOG.isStatistics() && varstat) {
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class KMeansHybridLloydMacQueen method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration += 2) {
{
// MacQueen
LOG.incrementProcessed(prog);
boolean changed = macQueenIterate(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
if (!changed) {
break;
}
}
{
// Lloyd
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
Aggregations