use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DBSCAN method runDBSCAN.
/**
* Run the DBSCAN algorithm
*
* @param relation Data relation
* @param rangeQuery Range query class
*/
protected void runDBSCAN(Relation<O> relation, RangeQuery<O> rangeQuery) {
final int size = relation.size();
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
processedIDs = DBIDUtil.newHashSet(size);
ArrayModifiableDBIDs seeds = DBIDUtil.newArray();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
if (!processedIDs.contains(iditer)) {
expandCluster(relation, rangeQuery, iditer, seeds, objprog, clusprog);
}
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
clusprog.setProcessed(resultList.size(), LOG);
}
if (processedIDs.size() == size) {
break;
}
}
// Finish progress logging
LOG.ensureCompleted(objprog);
LOG.setCompleted(clusprog);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class EvaluateRetrievalPerformance method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
// For storing the positive neighbors.
ModifiableDBIDs posn = DBIDUtil.newHashSet();
// Distance storage.
ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
// For counting labels seen in kNN
Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
// Statistics tracking
double map = 0., mroc = 0.;
double[] knnperf = new double[maxk];
int samples = 0;
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
Object label = lrelation.get(iter);
findMatches(posn, lrelation, label);
if (posn.size() > 0) {
computeDistances(nlist, iter, distQuery, relation);
if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
}
map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
samples += 1;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
if (samples < 1) {
throw new AbortException("No object matched - are labels parsed correctly?");
}
if (!(map >= 0) || !(mroc >= 0)) {
throw new AbortException("NaN in MAP/ROC.");
}
map /= samples;
mroc /= samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
for (int k = 0; k < maxk; k++) {
knnperf[k] = knnperf[k] / samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
}
return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class RankingQualityHistogram method run.
/**
* Process a database
*
* @param database Database to process
* @param relation Relation to process
* @return Histogram of ranking qualities
*/
public HistogramResult run(Database database, Relation<O> relation) {
final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
MeanVariance mv = new MeanVariance();
// sort neighbors
for (Cluster<?> clus : split) {
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
mv.put(result);
hist.increment(result, 1. / relation.size());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue() });
}
HistogramResult result = new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance());
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DOC method runDOC.
/**
* Performs a single run of DOC, finding a single cluster.
*
* @param database Database context
* @param relation used to get actual values for DBIDs.
* @param S The set of points we're working on.
* @param d Dimensionality of the data set we're currently working on.
* @param r Size of random samples.
* @param m Number of inner iterations (per seed point).
* @param n Number of outer iterations (seed points).
* @param minClusterSize Minimum size a cluster must have to be accepted.
* @return a cluster, if one is found, else <code>null</code>.
*/
protected Cluster<SubspaceModel> runDOC(Database database, Relation<V> relation, ArrayModifiableDBIDs S, final int d, int n, int m, int r, int minClusterSize) {
// Best cluster for the current run.
DBIDs C = null;
// Relevant attributes for the best cluster.
long[] D = null;
// Quality of the best cluster.
double quality = Double.NEGATIVE_INFINITY;
// Bounds for our cluster.
// ModifiableHyperBoundingBox bounds = new ModifiableHyperBoundingBox(new
// double[d], new double[d]);
// Inform the user about the progress in the current iteration.
FiniteProgress iprogress = LOG.isVerbose() ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG) : null;
Random random = rnd.getSingleThreadedRandom();
DBIDArrayIter iter = S.iter();
for (int i = 0; i < n; ++i) {
// Pick a random seed point.
iter.seek(random.nextInt(S.size()));
for (int j = 0; j < m; ++j) {
// Choose a set of random points.
DBIDs randomSet = DBIDUtil.randomSample(S, r, random);
// Initialize cluster info.
long[] nD = BitsUtil.zero(d);
// Test each dimension and build bounding box.
for (int k = 0; k < d; ++k) {
if (dimensionIsRelevant(k, relation, randomSet)) {
BitsUtil.setI(nD, k);
}
}
if (BitsUtil.cardinality(nD) > 0) {
DBIDs nC = findNeighbors(iter, nD, S, relation);
if (LOG.isDebuggingFiner()) {
LOG.finer("Testing a cluster candidate, |C| = " + nC.size() + ", |D| = " + BitsUtil.cardinality(nD));
}
// Is the cluster large enough?
if (nC.size() < minClusterSize) {
// Too small.
if (LOG.isDebuggingFiner()) {
LOG.finer("... but it's too small.");
}
continue;
}
// Better cluster than before?
double nQuality = computeClusterQuality(nC.size(), BitsUtil.cardinality(nD));
if (nQuality > quality) {
if (LOG.isDebuggingFiner()) {
LOG.finer("... and it's the best so far: " + nQuality + " vs. " + quality);
}
C = nC;
D = nD;
quality = nQuality;
} else {
if (LOG.isDebuggingFiner()) {
LOG.finer("... but we already have a better one.");
}
}
}
LOG.incrementProcessed(iprogress);
}
}
LOG.ensureCompleted(iprogress);
return (C != null) ? makeCluster(relation, C, D) : null;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class FastOPTICS method run.
/**
* Run the algorithm.
*
* @param db Database
* @param rel Relation
*/
public ClusterOrder run(Database db, Relation<V> rel) {
DBIDs ids = rel.getDBIDs();
DistanceQuery<V> dq = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
// initialize points used and reachability distance
reachDist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, UNDEFINED_DISTANCE);
// compute projections, density estimates and neighborhoods
// project points
index.computeSetsBounds(rel, minPts, ids);
// compute densities
inverseDensities = index.computeAverageDistInSet();
// get neighbors of points
neighs = index.getNeighs();
// compute ordering as for OPTICS
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("FastOPTICS clustering", ids.size(), LOG) : null;
processed = DBIDUtil.newHashSet(ids.size());
order = new ClusterOrder(ids, "FastOPTICS Cluster Order", "fast-optics");
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
if (!processed.contains(it)) {
expandClusterOrder(DBIDUtil.deref(it), order, dq, prog);
}
}
index.logStatistics();
LOG.ensureCompleted(prog);
return order;
}
Aggregations