use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class NearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < pij_i.length; j++) {
sum += pij_i[j];
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = pij_i[offi] + pij[j][offj];
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found
// TODO: the original code produces a symmetric matrix
// And it will now not sum to EARLY_EXAGGERATION anymore.
pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class ClassifierHoldoutEvaluationTask method run.
@Override
public void run() {
Duration ptime = LOG.newDuration("evaluation.time.load").begin();
MultipleObjectsBundle allData = databaseConnection.loadData();
holdout.initialize(allData);
LOG.statistics(ptime.end());
Duration time = LOG.newDuration("evaluation.time.total").begin();
ArrayList<ClassLabel> labels = holdout.getLabels();
int[][] confusion = new int[labels.size()][labels.size()];
for (int p = 0; p < holdout.numberOfPartitions(); p++) {
TrainingAndTestSet partition = holdout.nextPartitioning();
// Load the data set into a database structure (for indexing)
Duration dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".init.time").begin();
Database db = new StaticArrayDatabase(new MultipleObjectsBundleDatabaseConnection(partition.getTraining()), indexFactories);
db.initialize();
LOG.statistics(dur.end());
// Train the classifier
dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".train.time").begin();
Relation<ClassLabel> lrel = db.getRelation(TypeUtil.CLASSLABEL);
algorithm.buildClassifier(db, lrel);
LOG.statistics(dur.end());
// Evaluate the test set
dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".evaluation.time").begin();
// FIXME: this part is still a big hack, unfortunately!
MultipleObjectsBundle test = partition.getTest();
int lcol = AbstractHoldout.findClassLabelColumn(test);
int tcol = (lcol == 0) ? 1 : 0;
for (int i = 0, l = test.dataLength(); i < l; ++i) {
@SuppressWarnings("unchecked") O obj = (O) test.data(i, tcol);
ClassLabel truelbl = (ClassLabel) test.data(i, lcol);
ClassLabel predlbl = algorithm.classify(obj);
int pred = Collections.binarySearch(labels, predlbl);
int real = Collections.binarySearch(labels, truelbl);
confusion[pred][real]++;
}
LOG.statistics(dur.end());
}
LOG.statistics(time.end());
ConfusionMatrix m = new ConfusionMatrix(labels, confusion);
LOG.statistics(m.toString());
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class StaticArrayDatabase method initialize.
/**
* Initialize the database by getting the initial data from the database
* connection.
*/
@Override
public void initialize() {
if (databaseConnection != null) {
if (LOG.isDebugging()) {
LOG.debugFine("Loading data from database connection.");
}
MultipleObjectsBundle bundle = databaseConnection.loadData();
// Run at most once.
databaseConnection = null;
// Find DBIDs for bundle
{
DBIDs bids = bundle.getDBIDs();
if (bids instanceof ArrayStaticDBIDs) {
this.ids = (ArrayStaticDBIDs) bids;
} else if (bids == null) {
this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
} else {
this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
}
}
// Replace id representation (it would be nicer if we would not need
// DBIDView at all)
this.idrep = new DBIDView(this.ids);
relations.add(this.idrep);
getHierarchy().add(this, idrep);
DBIDArrayIter it = this.ids.iter();
int numrel = bundle.metaLength();
for (int i = 0; i < numrel; i++) {
SimpleTypeInformation<?> meta = bundle.meta(i);
@SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
for (it.seek(0); it.valid(); it.advance()) {
store.put(it, bundle.data(it.getOffset(), i));
}
Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
relations.add(relation);
getHierarchy().add(this, relation);
// Try to add indexes where appropriate
for (IndexFactory<?, ?> factory : indexFactories) {
if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
@SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
@SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
final Index index = ofact.instantiate(orep);
Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
index.initialize();
if (duration != null) {
LOG.statistics(duration.end());
}
getHierarchy().add(relation, index);
}
}
}
// fire insertion event
eventManager.fireObjectsInserted(ids);
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class ComputeKNNOutlierScores method runForEachK.
/**
* Iterate over the k range.
*
* @param prefix Prefix string
* @param startk Start k
* @param stepk Step k
* @param maxk Max k
* @param runner Runner to run
* @param out Output function
*/
private void runForEachK(String prefix, int startk, int stepk, int maxk, IntFunction<OutlierResult> runner, BiConsumer<String, OutlierResult> out) {
if (isDisabled(prefix)) {
LOG.verbose("Skipping (disabled): " + prefix);
// Disabled
return;
}
LOG.verbose("Running " + prefix);
final int digits = (int) FastMath.ceil(FastMath.log10(maxk + 1));
final String format = "%s-%0" + digits + "d";
for (int k = startk; k <= maxk; k += stepk) {
Duration time = LOG.newDuration(this.getClass().getCanonicalName() + "." + prefix + ".k" + k + ".runtime").begin();
OutlierResult result = runner.apply(k);
LOG.statistics(time.end());
if (result != null) {
out.accept(String.format(Locale.ROOT, format, prefix, k), result);
result.getHierarchy().removeSubtree(result);
}
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class GaussianAffinityMatrixBuilder method buildDistanceMatrix.
/**
* Build a distance matrix of squared distances.
*
* @param ids DBIDs
* @param dq Distance query
* @return Distance matrix
*/
protected double[][] buildDistanceMatrix(ArrayDBIDs ids, DistanceQuery<?> dq) {
final int size = ids.size();
double[][] dmat = new double[size][size];
final boolean square = !dq.getDistanceFunction().isSquared();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix", (size * (size - 1)) >>> 1, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.distancematrix").begin() : null;
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
for (ix.seek(0); ix.valid(); ix.advance()) {
double[] dmat_x = dmat[ix.getOffset()];
for (iy.seek(ix.getOffset() + 1); iy.valid(); iy.advance()) {
final double dist = dq.distance(ix, iy);
dmat[iy.getOffset()][ix.getOffset()] = dmat_x[iy.getOffset()] = square ? (dist * dist) : dist;
}
if (prog != null) {
int row = ix.getOffset() + 1;
prog.setProcessed(row * size - ((row * (row + 1)) >>> 1), LOG);
}
}
LOG.ensureCompleted(prog);
if (timer != null) {
LOG.statistics(timer.end());
}
return dmat;
}
Aggregations