use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class AveragePrecisionAtK method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final int qk = k + (includeSelf ? 0 : 1);
final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
if (!includeSelf && DBIDUtil.equal(iter, ri)) {
// Do not increment i.
continue;
}
positive += match(label, lrelation.get(ri)) ? 1 : 0;
final double precision = positive / (double) (i + 1);
mvs[i].put(precision);
i++;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
final MeanVarianceMinMax mv = mvs[i];
final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
}
return new CollectionResult<>("Average Precision", "average-precision", res);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class DummyAlgorithm method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and knn query for the Euclidean distance
// Hardcoded, only use this if you only allow the eucliden distance
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC);
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, 10);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Get the actual object from the database (but discard the result)
relation.get(iditer);
// run a 10NN query for each point (but discard the result)
knnQuery.getKNNForDBID(iditer, 10);
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class KNNBenchmarkAlgorithm method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
// No query set - use original database.
if (queries == null) {
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
KNNList knns = knnQuery.getKNNForObject(o, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class CASH method buildDB.
/**
* Builds a dim-1 dimensional database where the objects are projected into
* the specified subspace.
*
* @param dim the dimensionality of the database
* @param basis the basis defining the subspace
* @param ids the ids for the new database
* @param relation the database storing the parameterization functions
* @return a dim-1 dimensional database where the objects are projected into
* the specified subspace
*/
private MaterializedRelation<ParameterizationFunction> buildDB(int dim, double[][] basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
ProxyDatabase proxy = new ProxyDatabase(ids);
SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
WritableDataStore<ParameterizationFunction> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, ParameterizationFunction.class);
// Project
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
prep.put(iter, project(basis, relation.get(iter)));
}
if (LOG.isDebugging()) {
LOG.debugFine("db fuer dim " + (dim - 1) + ": " + ids.size());
}
MaterializedRelation<ParameterizationFunction> prel = new MaterializedRelation<>(type, ids, null, prep);
proxy.addRelation(prel);
return prel;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class KMeansOutlierDetection method run.
/**
* Run the outlier detection algorithm.
*
* @param database Database
* @param relation Relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceFunction<? super O> df = clusterer.getDistanceFunction();
DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database, relation);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
@SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
// FIXME: use a primitive distance function on number vectors instead.
O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double dist = dq.distance(mean, iter);
scores.put(iter, dist);
mm.put(dist);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
Aggregations