use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with separate radius relation
*
* @param database Database
* @param relation Relation
* @param radrel Radius relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation, Relation<NumberVector> radrel) {
if (queries != null) {
throw new AbortException("This 'run' method will not use the given query set!");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
double r = radrel.get(iditer).doubleValue(0);
DoubleDBIDList rres = rangeQuery.getRangeForDBID(iditer, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class LOCI method precomputeInterestingRadii.
/**
* Preprocessing step: determine the radii of interest for each point.
*
* @param ids IDs to process
* @param rangeQuery Range query
* @param interestingDistances Distances of interest
*/
protected void precomputeInterestingRadii(DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) {
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
// build list of critical distances
DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
{
int i = 0;
DoubleDBIDListIter ni = neighbors.iter();
while (ni.valid()) {
final double curdist = ni.doubleValue();
++i;
ni.advance();
// Skip, if tied to the next object:
if (ni.valid() && curdist == ni.doubleValue()) {
continue;
}
cdist.append(curdist, i);
// Scale radius, and reinsert
if (alpha != 1.) {
final double ri = curdist / alpha;
if (ri <= rmax) {
cdist.append(ri, Integer.MIN_VALUE);
}
}
}
}
cdist.sort();
// fill the gaps to have fast lookups of number of neighbors at a given
// distance.
int lastk = 0;
for (int i = 0, size = cdist.size(); i < size; i++) {
final int k = cdist.getInt(i);
if (k == Integer.MIN_VALUE) {
cdist.setValue(i, lastk);
} else {
lastk = k;
}
}
// TODO: shrink the list, removing duplicate radii?
interestingDistances.put(iditer, cdist);
LOG.incrementProcessed(progressPreproc);
}
LOG.ensureCompleted(progressPreproc);
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class NaiveMeanShiftClustering method run.
/**
* Run the mean-shift clustering algorithm.
*
* @param database Database
* @param relation Data relation
* @return Clustering result
*/
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
final RangeQuery<V> rangeq = database.getRangeQuery(distq);
final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
final int dim = RelationUtil.dimensionality(relation);
// Stopping threshold
final double threshold = bandwidth * 1E-10;
// Result store:
ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
// Initial position:
V position = relation.get(iter);
iterations: for (int j = 1; j <= MAXITER; j++) {
// Compute new position:
V newvec = null;
{
DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
newvec = factory.newNumberVector(newpos.getArrayRef());
// TODO: detect 0 weight!
}
if (!okay) {
noise.add(iter);
break iterations;
}
}
// Test if we are close to one of the known clusters:
double bestd = Double.POSITIVE_INFINITY;
Pair<V, ModifiableDBIDs> bestp = null;
for (Pair<V, ModifiableDBIDs> pair : clusters) {
final double merged = distq.distance(newvec, pair.first);
if (merged < bestd) {
bestd = merged;
bestp = pair;
}
}
// Check for convergence:
double delta = distq.distance(position, newvec);
if (bestd < 10 * threshold || bestd * 2 < delta) {
bestp.second.add(iter);
break iterations;
}
if (j == MAXITER) {
LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
}
if (Double.isNaN(delta)) {
LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
break iterations;
}
if (j == MAXITER || delta < threshold) {
if (LOG.isDebuggingFine()) {
LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
}
ArrayModifiableDBIDs cids = DBIDUtil.newArray();
cids.add(iter);
clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
break iterations;
}
position = newvec;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel>(noise, true));
}
Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class AbstractRangeQueryNeighborPredicate method preprocess.
/**
* Perform the preprocessing step.
*
* @param modelcls Class of models
* @param relation Data relation
* @param query Range query
* @return Precomputed models
*/
public DataStore<M> preprocess(Class<? super M> modelcls, Relation<O> relation, RangeQuery<O> query) {
WritableDataStore<M> storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, modelcls);
Duration time = getLogger().newDuration(this.getClass().getName() + ".preprocessing-time").begin();
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), getLogger()) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList neighbors = query.getRangeForDBID(iditer, epsilon);
storage.put(iditer, computeLocalModel(iditer, neighbors, relation));
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
getLogger().statistics(time.end());
return storage;
}
Aggregations