use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class WithinClusterMeanDistanceQualityMeasure method quality.
@Override
public <V extends NumberVector> double quality(Clustering<? extends MeanModel> clustering, NumberVectorDistanceFunction<? super V> distanceFunction, Relation<V> relation) {
double clusterDistanceSum = 0;
for (Cluster<? extends MeanModel> cluster : clustering.getAllClusters()) {
DBIDs ids = cluster.getIDs();
// Compute sum of pairwise distances:
double clusterPairwiseDistanceSum = 0;
for (DBIDIter iter1 = ids.iter(); iter1.valid(); iter1.advance()) {
NumberVector obj1 = relation.get(iter1);
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
clusterPairwiseDistanceSum += distanceFunction.distance(obj1, relation.get(iter2));
}
}
clusterDistanceSum += clusterPairwiseDistanceSum / (ids.size() * ids.size());
}
return clusterDistanceSum / clustering.getAllClusters().size();
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class SOD method computePerDimensionVariances.
/**
* Compute the per-dimension variances for the given neighborhood and center.
*
* @param relation Data relation
* @param center Center vector
* @param neighborhood Neighbors
* @return Per-dimension variances.
*/
private static double[] computePerDimensionVariances(Relation<? extends NumberVector> relation, double[] center, DBIDs neighborhood) {
final int dim = center.length;
double[] variances = new double[dim];
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
NumberVector databaseObject = relation.get(iter);
for (int d = 0; d < dim; d++) {
final double deviation = databaseObject.doubleValue(d) - center[d];
variances[d] += deviation * deviation;
}
}
VMath.times(variances, 1. / neighborhood.size());
return variances;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class AbstractProjectedHashFunctionFamily method generateHashFunctions.
@Override
public ArrayList<? extends LocalitySensitiveHashFunction<? super NumberVector>> generateHashFunctions(Relation<? extends NumberVector> relation, int l) {
int dim = RelationUtil.dimensionality(relation);
ArrayList<LocalitySensitiveHashFunction<? super NumberVector>> ps = new ArrayList<>(l);
final Random rnd = random.getSingleThreadedRandom();
for (int i = 0; i < l; i++) {
RandomProjectionFamily.Projection mat = proj.generateProjection(dim, k);
ps.add(new MultipleProjectionsLocalitySensitiveHashFunction(mat, width, rnd));
}
return ps;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class CosineHashFunctionFamily method generateHashFunctions.
@Override
public ArrayList<? extends LocalitySensitiveHashFunction<? super NumberVector>> generateHashFunctions(Relation<? extends NumberVector> relation, int l) {
int dim = RelationUtil.dimensionality(relation);
ArrayList<LocalitySensitiveHashFunction<? super NumberVector>> ps = new ArrayList<>(l);
for (int i = 0; i < l; i++) {
RandomProjectionFamily.Projection projection = proj.generateProjection(dim, k);
ps.add(new CosineLocalitySensitiveHashFunction(projection));
}
return ps;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
Aggregations