Search in sources :

Example 36 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class WithinClusterMeanDistanceQualityMeasure method quality.

@Override
public <V extends NumberVector> double quality(Clustering<? extends MeanModel> clustering, NumberVectorDistanceFunction<? super V> distanceFunction, Relation<V> relation) {
    double clusterDistanceSum = 0;
    for (Cluster<? extends MeanModel> cluster : clustering.getAllClusters()) {
        DBIDs ids = cluster.getIDs();
        // Compute sum of pairwise distances:
        double clusterPairwiseDistanceSum = 0;
        for (DBIDIter iter1 = ids.iter(); iter1.valid(); iter1.advance()) {
            NumberVector obj1 = relation.get(iter1);
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                clusterPairwiseDistanceSum += distanceFunction.distance(obj1, relation.get(iter2));
            }
        }
        clusterDistanceSum += clusterPairwiseDistanceSum / (ids.size() * ids.size());
    }
    return clusterDistanceSum / clustering.getAllClusters().size();
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 37 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class SOD method computePerDimensionVariances.

/**
 * Compute the per-dimension variances for the given neighborhood and center.
 *
 * @param relation Data relation
 * @param center Center vector
 * @param neighborhood Neighbors
 * @return Per-dimension variances.
 */
private static double[] computePerDimensionVariances(Relation<? extends NumberVector> relation, double[] center, DBIDs neighborhood) {
    final int dim = center.length;
    double[] variances = new double[dim];
    for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
        NumberVector databaseObject = relation.get(iter);
        for (int d = 0; d < dim; d++) {
            final double deviation = databaseObject.doubleValue(d) - center[d];
            variances[d] += deviation * deviation;
        }
    }
    VMath.times(variances, 1. / neighborhood.size());
    return variances;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 38 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class AbstractProjectedHashFunctionFamily method generateHashFunctions.

@Override
public ArrayList<? extends LocalitySensitiveHashFunction<? super NumberVector>> generateHashFunctions(Relation<? extends NumberVector> relation, int l) {
    int dim = RelationUtil.dimensionality(relation);
    ArrayList<LocalitySensitiveHashFunction<? super NumberVector>> ps = new ArrayList<>(l);
    final Random rnd = random.getSingleThreadedRandom();
    for (int i = 0; i < l; i++) {
        RandomProjectionFamily.Projection mat = proj.generateProjection(dim, k);
        ps.add(new MultipleProjectionsLocalitySensitiveHashFunction(mat, width, rnd));
    }
    return ps;
}
Also used : Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LocalitySensitiveHashFunction(de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.LocalitySensitiveHashFunction) MultipleProjectionsLocalitySensitiveHashFunction(de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.MultipleProjectionsLocalitySensitiveHashFunction) ArrayList(java.util.ArrayList) RandomProjectionFamily(de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily) MultipleProjectionsLocalitySensitiveHashFunction(de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.MultipleProjectionsLocalitySensitiveHashFunction)

Example 39 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class CosineHashFunctionFamily method generateHashFunctions.

@Override
public ArrayList<? extends LocalitySensitiveHashFunction<? super NumberVector>> generateHashFunctions(Relation<? extends NumberVector> relation, int l) {
    int dim = RelationUtil.dimensionality(relation);
    ArrayList<LocalitySensitiveHashFunction<? super NumberVector>> ps = new ArrayList<>(l);
    for (int i = 0; i < l; i++) {
        RandomProjectionFamily.Projection projection = proj.generateProjection(dim, k);
        ps.add(new CosineLocalitySensitiveHashFunction(projection));
    }
    return ps;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) LocalitySensitiveHashFunction(de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.LocalitySensitiveHashFunction) CosineLocalitySensitiveHashFunction(de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.CosineLocalitySensitiveHashFunction) ArrayList(java.util.ArrayList) CosineLocalitySensitiveHashFunction(de.lmu.ifi.dbs.elki.index.lsh.hashfunctions.CosineLocalitySensitiveHashFunction) RandomProjectionFamily(de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily) LessEqualConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint)

Example 40 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4