Search in sources :

Example 46 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class CBLOF method computeCBLOFs.

/**
 * Compute the CBLOF scores for all the data.
 *
 * @param relation Data to process
 * @param distance The distance function
 * @param cblofs CBLOF scores
 * @param cblofMinMax Minimum/maximum score tracker
 * @param largeClusters Large clusters output
 * @param smallClusters Small clusters output
 */
private void computeCBLOFs(Relation<O> relation, NumberVectorDistanceFunction<? super O> distance, WritableDoubleDataStore cblofs, DoubleMinMax cblofMinMax, List<? extends Cluster<MeanModel>> largeClusters, List<? extends Cluster<MeanModel>> smallClusters) {
    List<NumberVector> largeClusterMeans = new ArrayList<>(largeClusters.size());
    for (Cluster<MeanModel> largeCluster : largeClusters) {
        NumberVector mean = ModelUtil.getPrototypeOrCentroid(largeCluster.getModel(), relation, largeCluster.getIDs());
        largeClusterMeans.add(mean);
        // Compute CBLOF scores for members of large clusters
        for (DBIDIter iter = largeCluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double cblof = computeLargeClusterCBLOF(relation.get(iter), distance, mean, largeCluster);
            storeCBLOFScore(cblofs, cblofMinMax, cblof, iter);
        }
    }
    for (Cluster<MeanModel> smallCluster : smallClusters) {
        for (DBIDIter iter = smallCluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double cblof = computeSmallClusterCBLOF(relation.get(iter), distance, largeClusterMeans, smallCluster);
            storeCBLOFScore(cblofs, cblofMinMax, cblof, iter);
        }
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ArrayList(java.util.ArrayList) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 47 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class Centroid method make.

/**
 * Static constructor from an existing relation.
 *
 * @param relation Relation to use
 * @param ids IDs to use
 * @return Centroid
 */
public static Centroid make(Relation<? extends NumberVector> relation, DBIDs ids) {
    final int dim = RelationUtil.dimensionality(relation);
    Centroid c = new Centroid(dim);
    double[] elems = c.elements;
    int count = 0;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        NumberVector v = relation.get(iter);
        for (int i = 0; i < dim; i++) {
            elems[i] += v.doubleValue(i);
        }
        count += 1;
    }
    if (count == 0) {
        return c;
    }
    for (int i = 0; i < dim; i++) {
        elems[i] /= count;
    }
    c.wsum = count;
    return c;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 48 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class CovarianceMatrix method make.

/**
 * Static Constructor from a full relation.
 *
 * @param relation Relation to use.
 * @return Covariance matrix
 */
public static CovarianceMatrix make(Relation<? extends NumberVector> relation) {
    int dim = RelationUtil.dimensionality(relation);
    CovarianceMatrix c = new CovarianceMatrix(dim);
    double[] mean = c.mean;
    int count = 0;
    // Compute mean first:
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        NumberVector vec = relation.get(iditer);
        for (int i = 0; i < dim; i++) {
            mean[i] += vec.doubleValue(i);
        }
        count++;
    }
    if (count == 0) {
        return c;
    }
    // Normalize mean
    for (int i = 0; i < dim; i++) {
        mean[i] /= count;
    }
    // Compute covariances second
    // Two-pass approach is numerically okay and fast, when possible.
    // Scratch space
    double[] tmp = c.nmea;
    double[][] elems = c.elements;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        NumberVector vec = relation.get(iditer);
        for (int i = 0; i < dim; i++) {
            tmp[i] = vec.doubleValue(i) - mean[i];
        }
        for (int i = 0; i < dim; i++) {
            for (int j = i; j < dim; j++) {
                elems[i][j] += tmp[i] * tmp[j];
            }
        }
    }
    // Restore symmetry.
    for (int i = 0; i < dim; i++) {
        for (int j = i + 1; j < dim; j++) {
            elems[j][i] = elems[i][j];
        }
    }
    c.wsum = count;
    return c;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 49 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class CovarianceMatrix method make.

/**
 * Static Constructor from a full relation.
 *
 * @param relation Relation to use.
 * @param ids IDs to add
 * @return Covariance matrix
 */
public static CovarianceMatrix make(Relation<? extends NumberVector> relation, DBIDs ids) {
    int dim = RelationUtil.dimensionality(relation);
    CovarianceMatrix c = new CovarianceMatrix(dim);
    double[] mean = c.mean;
    int count = 0;
    // Compute mean first:
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        NumberVector vec = relation.get(iditer);
        for (int i = 0; i < dim; i++) {
            mean[i] += vec.doubleValue(i);
        }
        count++;
    }
    if (count == 0) {
        return c;
    }
    // Normalize mean
    for (int i = 0; i < dim; i++) {
        mean[i] /= count;
    }
    // Compute covariances second
    // Two-pass approach is numerically okay and fast, when possible.
    // Scratch space
    double[] tmp = c.nmea;
    double[][] elems = c.elements;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
        NumberVector vec = relation.get(iditer);
        for (int i = 0; i < dim; i++) {
            tmp[i] = vec.doubleValue(i) - mean[i];
        }
        for (int i = 0; i < dim; i++) {
            for (int j = i; j < dim; j++) {
                elems[i][j] += tmp[i] * tmp[j];
            }
        }
    }
    // Restore symmetry.
    for (int i = 0; i < dim; i++) {
        for (int j = i + 1; j < dim; j++) {
            elems[j][i] = elems[i][j];
        }
    }
    c.wsum = count;
    return c;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 50 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class SpatialPrimitiveDistanceFunctionTest method testSpatialDistanceConsistencyPositive.

@Test
public void testSpatialDistanceConsistencyPositive() {
    final Random rnd = new Random(1);
    final int dim = 7;
    final int iters = 10000;
    List<SpatialPrimitiveDistanceFunction<? super NumberVector>> dists = new ArrayList<>();
    dists.add(EuclideanDistanceFunction.STATIC);
    dists.add(ManhattanDistanceFunction.STATIC);
    dists.add(MaximumDistanceFunction.STATIC);
    dists.add(MinimumDistanceFunction.STATIC);
    dists.add(new LPNormDistanceFunction(3));
    dists.add(new LPNormDistanceFunction(.5));
    dists.add(CanberraDistanceFunction.STATIC);
    dists.add(HistogramIntersectionDistanceFunction.STATIC);
    dists.add(SquaredEuclideanDistanceFunction.STATIC);
    dists.add(ArcCosineDistanceFunction.STATIC);
    dists.add(CosineDistanceFunction.STATIC);
    double[] d1 = new double[dim];
    double[] d2 = new double[dim];
    double[] d3 = new double[dim];
    double[] d4 = new double[dim];
    DoubleVector v1 = DoubleVector.wrap(d1);
    ModifiableHyperBoundingBox mbr = new ModifiableHyperBoundingBox(d2, d3);
    DoubleVector v2 = DoubleVector.wrap(d4);
    for (int i = 0; i < iters; i++) {
        for (int d = 0; d < dim; d++) {
            d1[d] = rnd.nextDouble() * 2E4;
            d2[d] = rnd.nextDouble() * 2E4;
            d3[d] = rnd.nextDouble() * 2E4;
            if (d2[d] > d3[d]) {
                double t = d2[d];
                d2[d] = d3[d];
                d3[d] = t;
            }
            double m = rnd.nextDouble();
            d4[d] = m * d2[d] + (1 - m) * d3[d];
        }
        for (SpatialPrimitiveDistanceFunction<? super NumberVector> dis : dists) {
            compareDistances(v1, mbr, v2, dis);
        }
    }
}
Also used : LPNormDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ArrayList(java.util.ArrayList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) ModifiableHyperBoundingBox(de.lmu.ifi.dbs.elki.data.ModifiableHyperBoundingBox) Test(org.junit.Test)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4