use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class KNNBenchmarkAlgorithm method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
// No query set - use original database.
if (queries == null) {
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
KNNList knns = knnQuery.getKNNForObject(o, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
}
return null;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class CASH method run.
/**
* Run CASH on the relation.
*
* @param database Database
* @param vrel Relation
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<V> vrel) {
fulldatabase = preprocess(database, vrel);
processedIDs = DBIDUtil.newHashSet(fulldatabase.size());
noiseDim = dimensionality(fulldatabase);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("CASH Clustering", fulldatabase.size(), LOG) : null;
Clustering<Model> result = doRun(fulldatabase, progress);
LOG.ensureCompleted(progress);
if (LOG.isVerbose()) {
StringBuilder msg = new StringBuilder(1000);
for (Cluster<Model> c : result.getAllClusters()) {
if (c.getModel() instanceof LinearEquationModel) {
LinearEquationModel s = (LinearEquationModel) c.getModel();
msg.append("\n Cluster: Dim: " + s.getLes().subspacedim() + " size: " + c.size());
} else {
msg.append("\n Cluster: " + c.getModel().getClass().getName() + " size: " + c.size());
}
}
LOG.verbose(msg.toString());
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class BarnesHutTSNE method optimizetSNE.
/**
* Perform the actual tSNE optimization.
*
* @param pij Sparse initial affinity matrix
* @param sol Solution output array (preinitialized)
*/
protected void optimizetSNE(AffinityMatrix pij, double[][] sol) {
final int size = pij.size();
if (size * 3L * dim > 0x7FFF_FFFAL) {
throw new AbortException("Memory exceeds Java array size limit.");
}
// Meta information on each point; joined for memory locality.
// Gradient, Momentum, and learning rate
// For performance, we use a flat memory layout!
double[] meta = new double[size * 3 * dim];
final int dim3 = dim * 3;
for (int off = 2 * dim; off < meta.length; off += dim3) {
// Initial learning rate
Arrays.fill(meta, off, off + dim, 1.);
}
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Iterative Optimization", iterations, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.optimization").begin() : null;
// Optimize
for (int i = 0; i < iterations; i++) {
computeGradient(pij, sol, meta);
updateSolution(sol, meta, i);
// Undo early exaggeration
if (i == EARLY_EXAGGERATION_ITERATIONS) {
pij.scale(1. / EARLY_EXAGGERATION);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (timer != null) {
LOG.statistics(timer.end());
}
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class GaussianAffinityMatrixBuilder method computePij.
/**
* Compute the pij from the distance matrix.
*
* @param dist Distance matrix.
* @param sigma Kernel bandwidth sigma
* @param initialScale Initial scale
* @return Affinity matrix pij
*/
protected static double[][] computePij(double[][] dist, double sigma, double initialScale) {
final int size = dist.length;
final double msigmasq = -.5 / (sigma * sigma);
double[][] pij = new double[size][size];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing affinities", size, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(GaussianAffinityMatrixBuilder.class.getName() + ".runtime.pijmatrix").begin() : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (int i = 0; i < size; i++) {
double logP = computeH(i, dist[i], pij[i], msigmasq);
if (mv != null) {
mv.put(FastMath.exp(logP));
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(GaussianAffinityMatrixBuilder.class.getName() + ".perplexity.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(GaussianAffinityMatrixBuilder.class.getName() + ".perplexity.stddev", mv.getSampleStddev()));
}
// Scale pij to have the desired sum EARLY_EXAGGERATION
double sum = 0.;
for (int i = 1; i < size; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < i; j++) {
// Nur über halbe Matrix!
// Symmetrie herstellen
sum += (pij_i[j] += pij[j][i]);
}
}
// Scaling taken from original tSNE code:
final double scale = initialScale / (2. * sum);
for (int i = 1; i < size; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < i; j++) {
pij_i[j] = pij[j][i] = MathUtil.max(pij_i[j] * scale, MIN_PIJ);
}
}
return pij;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class NearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < pij_i.length; j++) {
sum += pij_i[j];
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = pij_i[offi] + pij[j][offj];
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found
// TODO: the original code produces a symmetric matrix
// And it will now not sum to EARLY_EXAGGERATION anymore.
pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
Aggregations