use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SNE method optimizeSNE.
/**
* Perform the actual tSNE optimization.
*
* @param pij Initial affinity matrix
* @param sol Solution output array (preinitialized)
*/
protected void optimizeSNE(AffinityMatrix pij, double[][] sol) {
final int size = pij.size();
if (size * 3L * dim > 0x7FFF_FFFAL) {
throw new AbortException("Memory exceeds Java array size limit.");
}
// Meta information on each point; joined for memory locality.
// Gradient, Momentum, and learning rate
// For performance, we use a flat memory layout!
double[] meta = new double[size * 3 * dim];
final int dim3 = dim * 3;
for (int off = 2 * dim; off < meta.length; off += dim3) {
// Initial learning rate
Arrays.fill(meta, off, off + dim, 1.);
}
// Affinity matrix in projected space
double[][] qij = new double[size][size];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Iterative Optimization", iterations, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.optimization").begin() : null;
// Optimize
for (int it = 0; it < iterations; it++) {
double qij_sum = computeQij(qij, sol);
computeGradient(pij, qij, qij_sum, sol, meta);
updateSolution(sol, meta, it);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (timer != null) {
LOG.statistics(timer.end());
}
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class TSNE method optimizetSNE.
/**
* Perform the actual tSNE optimization.
*
* @param pij Initial affinity matrix
* @param sol Solution output array (preinitialized)
*/
protected void optimizetSNE(AffinityMatrix pij, double[][] sol) {
final int size = pij.size();
if (size * 3L * dim > 0x7FFF_FFFAL) {
throw new AbortException("Memory exceeds Java array size limit.");
}
// Meta information on each point; joined for memory locality.
// Gradient, Momentum, and learning rate
// For performance, we use a flat memory layout!
double[] meta = new double[size * 3 * dim];
final int dim3 = dim * 3;
for (int off = 2 * dim; off < meta.length; off += dim3) {
// Initial learning rate
Arrays.fill(meta, off, off + dim, 1.);
}
// Affinity matrix in projected space
double[][] qij = new double[size][size];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Iterative Optimization", iterations, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.optimization").begin() : null;
// Optimize
for (int it = 0; it < iterations; it++) {
double qij_sum = computeQij(qij, sol);
computeGradient(pij, qij, qij_sum, sol, meta);
updateSolution(sol, meta, it);
// Undo early exaggeration
if (it == EARLY_EXAGGERATION_ITERATIONS) {
pij.scale(1. / EARLY_EXAGGERATION);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (timer != null) {
LOG.statistics(timer.end());
}
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class AveragePrecisionAtK method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final int qk = k + (includeSelf ? 0 : 1);
final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
if (!includeSelf && DBIDUtil.equal(iter, ri)) {
// Do not increment i.
continue;
}
positive += match(label, lrelation.get(ri)) ? 1 : 0;
final double precision = positive / (double) (i + 1);
mvs[i].put(precision);
i++;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
final MeanVarianceMinMax mv = mvs[i];
final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
}
return new CollectionResult<>("Average Precision", "average-precision", res);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DistanceStatisticsWithClasses method run.
@Override
public HistogramResult run(Database database) {
final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
// determine binning ranges.
DoubleMinMax gminmax = new DoubleMinMax();
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// global in-cluster min/max
DoubleMinMax giminmax = new DoubleMinMax();
// global other-cluster min/max
DoubleMinMax gominmax = new DoubleMinMax();
// in-cluster distances
MeanVariance mimin = new MeanVariance();
MeanVariance mimax = new MeanVariance();
MeanVariance midif = new MeanVariance();
// other-cluster distances
MeanVariance momin = new MeanVariance();
MeanVariance momax = new MeanVariance();
MeanVariance modif = new MeanVariance();
// Histogram
final ObjHistogram<long[]> histogram;
LOG.beginStep(stepprog, 1, "Prepare histogram.");
if (exact) {
gminmax = exactMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else if (sampling) {
gminmax = sampleMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else {
histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {
@Override
protected long[] downsample(Object[] data, int start, int end, int size) {
long[] ret = new long[2];
for (int i = start; i < end; i++) {
long[] existing = (long[]) data[i];
if (existing != null) {
for (int c = 0; c < 2; c++) {
ret[c] += existing[c];
}
}
}
return ret;
}
@Override
protected long[] aggregate(long[] first, long[] second) {
for (int c = 0; c < 2; c++) {
first[c] += second[c];
}
return first;
}
@Override
protected long[] cloneForCache(long[] data) {
return data.clone();
}
@Override
protected long[] makeObject() {
return new long[2];
}
};
}
LOG.beginStep(stepprog, 2, "Build histogram.");
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
// iterate per cluster
final long[] incFirst = new long[] { 1L, 0L };
final long[] incSecond = new long[] { 0L, 1L };
for (Cluster<?> c1 : split) {
for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself.
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incFirst);
iminmax.put(d);
}
// aggregate
mimin.put(iminmax.getMin());
mimax.put(iminmax.getMax());
midif.put(iminmax.getDiff());
// min/max
giminmax.put(iminmax.getMin());
giminmax.put(iminmax.getMax());
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
for (Cluster<?> c2 : split) {
if (c2 == c1) {
continue;
}
for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself (shouldn't happen though)
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incSecond);
ominmax.put(d);
}
}
// aggregate
momin.put(ominmax.getMin());
momax.put(ominmax.getMax());
modif.put(ominmax.getDiff());
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Update values (only needed for sampling case).
gminmax.put(gominmax);
LOG.setCompleted(stepprog);
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
inum += iter.getValue()[0];
onum += iter.getValue()[1];
}
long bnum = inum + onum;
Collection<double[]> binstat = new ArrayList<>(numbin);
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
final long[] value = iter.getValue();
final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
final double ocof = (onum == 0) ? 0 : ((double) value[1]) / onum / histogram.getBinsize();
final double ocaf = ((double) value[1]) / bnum / histogram.getBinsize();
binstat.add(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
}
HistogramResult result = new HistogramResult("Distance Histogram", "distance-histogram", binstat);
result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class KNNJoin method run.
/**
* Inner run method. This returns a double store, and is used by
* {@link de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNJoinMaterializeKNNPreprocessor}
*
* @param relation Data relation
* @param ids Object IDs
* @return Data store
*/
@SuppressWarnings("unchecked")
public WritableDataStore<KNNList> run(Relation<V> relation, DBIDs ids) {
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) {
throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation.getHierarchy(), relation, SpatialIndexTree.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one.");
}
// FIXME: Ensure were looking at the right relation!
SpatialIndexTree<N, E> index = indexes.iterator().next();
SpatialPrimitiveDistanceFunction<V> distFunction = (SpatialPrimitiveDistanceFunction<V>) getDistanceFunction();
// data pages
List<E> ps_candidates = new ArrayList<>(index.getLeaves());
// knn heaps
List<List<KNNHeap>> heaps = new ArrayList<>(ps_candidates.size());
// Initialize with the page self-pairing
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
heaps.add(initHeaps(distFunction, pr));
}
// Build priority queue
final int sqsize = ps_candidates.size() * (ps_candidates.size() - 1) >>> 1;
ComparableMinHeap<Task> pq = new ComparableMinHeap<>(sqsize);
if (LOG.isDebuggingFine()) {
LOG.debugFine("Number of leaves: " + ps_candidates.size() + " so " + sqsize + " MBR computations.");
}
FiniteProgress mprogress = LOG.isVerbose() ? new FiniteProgress("Comparing leaf MBRs", sqsize, LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
E pr_entry = ps_candidates.get(i);
N pr = index.getNode(pr_entry);
List<KNNHeap> pr_heaps = heaps.get(i);
double pr_knn_distance = computeStopDistance(pr_heaps);
for (int j = i + 1; j < ps_candidates.size(); j++) {
E ps_entry = ps_candidates.get(j);
N ps = index.getNode(ps_entry);
List<KNNHeap> ps_heaps = heaps.get(j);
double ps_knn_distance = computeStopDistance(ps_heaps);
double minDist = distFunction.minDist(pr_entry, ps_entry);
// Resolve immediately:
if (minDist <= 0.) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else if (minDist <= pr_knn_distance || minDist <= ps_knn_distance) {
pq.add(new Task(minDist, i, j));
}
LOG.incrementProcessed(mprogress);
}
}
LOG.ensureCompleted(mprogress);
// Process the queue
FiniteProgress qprogress = LOG.isVerbose() ? new FiniteProgress("Processing queue", pq.size(), LOG) : null;
IndefiniteProgress fprogress = LOG.isVerbose() ? new IndefiniteProgress("Full comparisons", LOG) : null;
while (!pq.isEmpty()) {
Task task = pq.poll();
List<KNNHeap> pr_heaps = heaps.get(task.i);
List<KNNHeap> ps_heaps = heaps.get(task.j);
double pr_knn_distance = computeStopDistance(pr_heaps);
double ps_knn_distance = computeStopDistance(ps_heaps);
boolean dor = task.mindist <= pr_knn_distance;
boolean dos = task.mindist <= ps_knn_distance;
if (dor || dos) {
N pr = index.getNode(ps_candidates.get(task.i));
N ps = index.getNode(ps_candidates.get(task.j));
if (dor && dos) {
processDataPages(distFunction, pr_heaps, ps_heaps, pr, ps);
} else {
if (dor) {
processDataPages(distFunction, pr_heaps, null, pr, ps);
} else /* dos */
{
processDataPages(distFunction, ps_heaps, null, ps, pr);
}
}
LOG.incrementProcessed(fprogress);
}
LOG.incrementProcessed(qprogress);
}
LOG.ensureCompleted(qprogress);
LOG.setCompleted(fprogress);
WritableDataStore<KNNList> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress pageprog = LOG.isVerbose() ? new FiniteProgress("Number of processed data pages", ps_candidates.size(), LOG) : null;
for (int i = 0; i < ps_candidates.size(); i++) {
N pr = index.getNode(ps_candidates.get(i));
List<KNNHeap> pr_heaps = heaps.get(i);
// Finalize lists
for (int j = 0; j < pr.getNumEntries(); j++) {
knnLists.put(((LeafEntry) pr.getEntry(j)).getDBID(), pr_heaps.get(j).toKNNList());
}
// Forget heaps and pq
heaps.set(i, null);
LOG.incrementProcessed(pageprog);
}
LOG.ensureCompleted(pageprog);
return knnLists;
}
Aggregations