use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class VisualizePairwiseGainMatrix method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
ResultHierarchy hier = database.getHierarchy();
Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
final String firstlabel = labels.get(firstid);
if (!firstlabel.matches(".*by.?label.*")) {
throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
}
relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
// Dimensionality and reference vector
final int dim = RelationUtil.dimensionality(relation);
final NumberVector refvec = relation.get(firstid);
// Build the truth vector
VectorNonZero pos = new VectorNonZero(refvec);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
ids.remove(firstid);
ids.sort();
final int size = ids.size();
double[][] data = new double[size][size];
DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
// Vote combination buffer.
double[] buf = new double[2];
int a = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
final NumberVector veca = relation.get(id);
// Direct AUC score:
{
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
data[a][a] = auc;
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
// Compare to others, exploiting symmetry
DBIDArrayIter id2 = ids.iter();
id2.seek(a + 1);
for (int b = a + 1; b < size; b++, id2.advance()) {
final NumberVector vecb = relation.get(id2);
double[] combined = new double[dim];
for (int d = 0; d < dim; d++) {
buf[0] = veca.doubleValue(d);
buf[1] = vecb.doubleValue(d);
combined[d] = voting.combine(buf);
}
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
// logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
data[a][b] = auc;
data[b][a] = auc;
commax.put(data[a][b]);
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
}
for (int a = 0; a < size; a++) {
for (int b = a + 1; b < size; b++) {
double ref = Math.max(data[a][a], data[b][b]);
data[a][b] = (data[a][b] - ref) / (1 - ref);
data[b][a] = (data[b][a] - ref) / (1 - ref);
// logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
minmax.put(data[a][b]);
}
}
for (int a = 0; a < size; a++) {
data[a][a] = 0;
}
LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
boolean hasneg = (minmax.getMin() < -1E-3);
LinearScaling scale;
if (!hasneg) {
scale = LinearScaling.fromMinMax(0., minmax.getMax());
} else {
scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
}
scale = LinearScaling.fromMinMax(0., .5);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < size; x++) {
for (int y = x; y < size; y++) {
double val = data[x][y];
val = Math.max(-1, Math.min(1., scale.getScaled(val)));
// Compute color:
final int col;
{
if (val >= 0) {
int ival = 0xFF & (int) (255 * val);
col = 0xff000000 | (ival << 8);
} else {
int ival = 0xFF & (int) (255 * -val);
col = 0xff000000 | (ival << 16);
}
}
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
}
SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
hier.add(database, smat);
VisualizerContext context = vispar.newContext(hier, smat);
// Attach visualizers to results
SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
factory.processNewResult(context, database);
VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
if (task.getFactory() == factory) {
showVisualization(context, factory, task);
}
});
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SharedNearestNeighborPreprocessor method initialize.
@Override
public void initialize() {
if (getLogger().isVerbose()) {
getLogger().verbose("Assigning nearest neighbor lists to database objects");
}
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, ArrayDBIDs.class);
KNNQuery<O> knnquery = QueryUtil.getKNNQuery(relation, distanceFunction, numberOfNeighbors);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("assigning nearest neighbor lists", relation.size(), getLogger()) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors);
DBIDs kNN = knnquery.getKNNForDBID(iditer, numberOfNeighbors);
for (DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) {
// if(!id.equals(nid)) {
neighbors.add(iter);
// Size limitation to exactly numberOfNeighbors
if (neighbors.size() >= numberOfNeighbors) {
break;
}
}
neighbors.sort();
storage.put(iditer, neighbors);
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SpatialApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
SpatialIndexTree<N, E> index = getSpatialIndex(relation);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
MeanVariance pagesize = new MeanVariance();
MeanVariance ksize = new MeanVariance();
final Logging log = getLogger();
if (log.isVerbose()) {
log.verbose("Approximating nearest neighbor lists to database objects");
}
List<E> leaves = index.getLeaves();
FiniteProgress progress = log.isVerbose() ? new FiniteProgress("Processing leaf nodes", leaves.size(), log) : null;
for (E leaf : leaves) {
N node = index.getNode(leaf);
int size = node.getNumEntries();
pagesize.put(size);
if (log.isDebuggingFinest()) {
log.debugFinest("NumEntires = " + size);
}
// Collect the ids in this node.
ArrayModifiableDBIDs ids = DBIDUtil.newArray(size);
for (int i = 0; i < size; i++) {
ids.add(((LeafEntry) node.getEntry(i)).getDBID());
}
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) {
DBIDPair key = DBIDUtil.newPair(id, id2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, id2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(id, id2);
kNN.insert(d, id2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(id2, id);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(id, kNN.toKNNList());
}
if (log.isDebugging() && cache.size() > 0) {
log.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
log.incrementProcessed(progress);
}
log.ensureCompleted(progress);
if (log.isVerbose()) {
log.verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev());
log.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class DeLiClu method run.
public ClusterOrder run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
// first do the knn-Join
if (LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
Relation<KNNList> knns = knnJoin.run(relation);
DBIDs ids = relation.getDBIDs();
final int size = ids.size();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = DBIDUtil.deref(ids.iter());
clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
int numHandled = 1;
index.setHandled(startID, relation.get(startID));
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
heap.add(spatialObjectPair);
while (numHandled < size) {
if (heap.isEmpty()) {
throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
}
SpatialObjectPair dataPair = heap.poll();
// pair of nodes
if (dataPair.isExpandable) {
expandNodes(index, distFunction, dataPair, knns);
} else // pair of objects
{
// set handled
LeafEntry e1 = (LeafEntry) dataPair.entry1;
LeafEntry e2 = (LeafEntry) dataPair.entry2;
final DBID e1id = e1.getDBID();
IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
if (path == null) {
throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
}
// add to cluster order
clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
numHandled++;
// reinsert expanded leafs
reinsertExpanded(distFunction, index, path, knns);
if (progress != null) {
progress.setProcessed(numHandled, LOG);
}
}
}
LOG.ensureCompleted(progress);
return clusterOrder;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class PerplexityAffinityMatrixBuilder method computePij.
/**
* Compute the pij from the distance matrix.
*
* @param dist Distance matrix.
* @param perplexity Desired perplexity
* @param initialScale Initial scale
* @return Affinity matrix pij
*/
protected static double[][] computePij(double[][] dist, double perplexity, double initialScale) {
final int size = dist.length;
final double logPerp = FastMath.log(perplexity);
double[][] pij = new double[size][size];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Optimizing perplexities", size, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(PerplexityAffinityMatrixBuilder.class.getName() + ".runtime.pijmatrix").begin() : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (int i = 0; i < size; i++) {
double beta = computePi(i, dist[i], pij[i], perplexity, logPerp);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(PerplexityAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(PerplexityAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
// Scale pij to have the desired sum EARLY_EXAGGERATION
double sum = 0.;
for (int i = 1; i < size; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < i; j++) {
// Nur über halbe Matrix!
// Symmetrie herstellen
sum += (pij_i[j] += pij[j][i]);
}
}
// Scaling taken from original tSNE code:
final double scale = initialScale / (2. * sum);
for (int i = 1; i < size; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < i; j++) {
pij_i[j] = pij[j][i] = MathUtil.max(pij_i[j] * scale, MIN_PIJ);
}
}
return pij;
}
Aggregations