use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class DeLiClu method run.
public ClusterOrder run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
// first do the knn-Join
if (LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
Relation<KNNList> knns = knnJoin.run(relation);
DBIDs ids = relation.getDBIDs();
final int size = ids.size();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = DBIDUtil.deref(ids.iter());
clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
int numHandled = 1;
index.setHandled(startID, relation.get(startID));
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
heap.add(spatialObjectPair);
while (numHandled < size) {
if (heap.isEmpty()) {
throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
}
SpatialObjectPair dataPair = heap.poll();
// pair of nodes
if (dataPair.isExpandable) {
expandNodes(index, distFunction, dataPair, knns);
} else // pair of objects
{
// set handled
LeafEntry e1 = (LeafEntry) dataPair.entry1;
LeafEntry e2 = (LeafEntry) dataPair.entry2;
final DBID e1id = e1.getDBID();
IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
if (path == null) {
throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
}
// add to cluster order
clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
numHandled++;
// reinsert expanded leafs
reinsertExpanded(distFunction, index, path, knns);
if (progress != null) {
progress.setProcessed(numHandled, LOG);
}
}
}
LOG.ensureCompleted(progress);
return clusterOrder;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class TSNE method run.
public Relation<DoubleVector> run(Relation<O> relation) {
AffinityMatrix pij = affinity.computeAffinityMatrix(relation, EARLY_EXAGGERATION);
// Create initial solution.
final int size = pij.size();
double[][] sol = randomInitialSolution(size, dim, random.getSingleThreadedRandom());
projectedDistances.setLong(0L);
optimizetSNE(pij, sol);
LOG.statistics(projectedDistances);
// Remove the original (unprojected) data unless configured otherwise.
removePreviousRelation(relation);
// Transform into output data format.
DBIDs ids = relation.getDBIDs();
WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
for (DBIDArrayIter it = pij.iterDBIDs(); it.valid(); it.advance()) {
proj.put(it, DoubleVector.wrap(sol[it.getOffset()]));
}
return new MaterializedRelation<>("tSNE", "t-SNE", otype, proj, ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class AveragePrecisionAtK method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final int qk = k + (includeSelf ? 0 : 1);
final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
if (!includeSelf && DBIDUtil.equal(iter, ri)) {
// Do not increment i.
continue;
}
positive += match(label, lrelation.get(ri)) ? 1 : 0;
final double precision = positive / (double) (i + 1);
mvs[i].put(precision);
i++;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
final MeanVarianceMinMax mv = mvs[i];
final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
}
return new CollectionResult<>("Average Precision", "average-precision", res);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class KNNJoin method run.
/**
* Joins in the given spatial database to each object its k-nearest neighbors.
*
* @param relation Relation to process
* @return result
*/
public Relation<KNNList> run(Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
WritableDataStore<KNNList> knnLists = run(relation, ids);
// Wrap as relation:
return new MaterializedRelation<>("k nearest neighbors", "kNNs", TypeUtil.KNNLIST, knnLists, ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class KNNBenchmarkAlgorithm method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
// No query set - use original database.
if (queries == null) {
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
KNNList knns = knnQuery.getKNNForObject(o, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
}
return null;
}
Aggregations