use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
Mean mid = LOG.isStatistics() ? new Mean() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (mid != null) {
LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
}
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
if (j > i) {
// Exploit symmetry.
continue;
}
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
}
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found, so zero.
pij_i[offi] = 0;
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class AbstractRangeQueryNeighborPredicate method preprocess.
/**
* Perform the preprocessing step.
*
* @param modelcls Class of models
* @param relation Data relation
* @param query Range query
* @return Precomputed models
*/
public DataStore<M> preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query) {
WritableDataStore<M> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, modelcls);
Duration time = getLogger().newDuration(this.getClass().getName() + ".preprocessing-time").begin();
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), getLogger()) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList neighbors = query.getRangeForDBID(iditer, epsilon);
storage.put(iditer, computeLocalModel(iditer, neighbors, relation));
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
getLogger().statistics(time.end());
return storage;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class COPACNeighborPredicate method instantiate.
/**
* Full instantiation method.
*
* @param database Database
* @param relation Vector relation
* @return Instance
*/
public COPACNeighborPredicate.Instance instantiate(Database database, Relation<V> relation) {
DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
WritableDataStore<COPACModel> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, COPACModel.class);
Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
storage.put(iditer, computeLocalModel(iditer, ref, relation));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
LOG.statistics(time.end());
return new Instance(relation.getDBIDs(), storage);
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class ERiCNeighborPredicate method instantiate.
/**
* Full instantiation interface.
*
* @param database Database
* @param relation Relation
* @return Instance
*/
public Instance instantiate(Database database, Relation<V> relation) {
DistanceQuery<V> dq = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
KNNQuery<V> knnq = database.getKNNQuery(dq, settings.k);
WritableDataStore<PCAFilteredResult> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, PCAFilteredResult.class);
PCARunner pca = settings.pca;
EigenPairFilter filter = settings.filter;
Duration time = LOG.newDuration(this.getClass().getName() + ".preprocessing-time").begin();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), LOG) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList ref = knnq.getKNNForDBID(iditer, settings.k);
PCAResult pcares = pca.processQueryResult(ref, relation);
storage.put(iditer, new PCAFilteredResult(pcares.getEigenPairs(), filter.filter(pcares.getEigenvalues()), 1., 0.));
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
LOG.statistics(time.end());
return new Instance(relation.getDBIDs(), storage, relation);
}
use of de.lmu.ifi.dbs.elki.logging.statistics.Duration in project elki by elki-project.
the class APRIORI method run.
/**
* Performs the APRIORI algorithm on the given database.
*
* @param relation the Relation to process
* @return the AprioriResult learned by this APRIORI
*/
public FrequentItemsetsResult run(Relation<BitVector> relation) {
DBIDs ids = relation.getDBIDs();
List<Itemset> solution = new ArrayList<>();
final int size = ids.size();
final int needed = getMinimumSupport(size);
// TODO: we don't strictly require a vector field.
// We could work with knowing just the maximum dimensionality beforehand.
VectorFieldTypeInformation<BitVector> meta = RelationUtil.assumeVectorField(relation);
if (size > 0) {
final int dim = meta.getDimensionality();
Duration timeone = LOG.newDuration(STAT + "1-items.time").begin();
List<OneItemset> oneitems = buildFrequentOneItemsets(relation, dim, needed);
LOG.statistics(timeone.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "1-items.frequent", oneitems.size()));
LOG.statistics(new LongStatistic(STAT + "1-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), oneitems, meta));
}
if (minlength <= 1) {
solution.addAll(oneitems);
}
if (oneitems.size() >= 2 && maxlength >= 2) {
Duration timetwo = LOG.newDuration(STAT + "2-items.time").begin();
ArrayModifiableDBIDs survivors = DBIDUtil.newArray(ids.size());
List<? extends Itemset> candidates = buildFrequentTwoItemsets(oneitems, relation, dim, needed, ids, survivors);
// Continue with reduced set of transactions.
ids = survivors;
LOG.statistics(timetwo.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + "2-items.frequent", candidates.size()));
LOG.statistics(new LongStatistic(STAT + "2-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
}
if (minlength <= 2) {
solution.addAll(candidates);
}
for (int length = 3; length <= maxlength && candidates.size() >= length; length++) {
Duration timel = LOG.newDuration(STAT + length + "-items.time").begin();
// Join to get the new candidates
candidates = aprioriGenerate(candidates, length, dim);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest(debugDumpCandidates(new StringBuilder().append("Before pruning: "), candidates, meta));
}
survivors = DBIDUtil.newArray(ids.size());
candidates = frequentItemsets(candidates, relation, needed, ids, survivors, length);
// Continue with reduced set of transactions.
ids = survivors;
LOG.statistics(timel.end());
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(STAT + length + "-items.frequent", candidates.size()));
LOG.statistics(new LongStatistic(STAT + length + "-items.transactions", ids.size()));
}
if (LOG.isDebuggingFine()) {
LOG.debugFine(debugDumpCandidates(new StringBuilder(), candidates, meta));
}
solution.addAll(candidates);
}
}
}
return new FrequentItemsetsResult("APRIORI", "apriori", solution, meta, size);
}
Aggregations