Search in sources :

Example 56 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class ORCLUS method projectedEnergy.

/**
 * Computes the projected energy of the specified clusters. The projected
 * energy is given by the mean square distance of the points to the centroid
 * of the union cluster c, when all points in c are projected to the subspace
 * of c.
 *
 * @param relation the relation holding the objects
 * @param c_i the first cluster
 * @param c_j the second cluster
 * @param i the index of cluster c_i in the cluster list
 * @param j the index of cluster c_j in the cluster list
 * @param dim the dimensionality of the clusters
 * @return the projected energy of the specified cluster
 */
private ProjectedEnergy projectedEnergy(Relation<V> relation, ORCLUSCluster c_i, ORCLUSCluster c_j, int i, int j, int dim) {
    NumberVectorDistanceFunction<? super V> distFunc = SquaredEuclideanDistanceFunction.STATIC;
    // union of cluster c_i and c_j
    ORCLUSCluster c_ij = union(relation, c_i, c_j, dim);
    double sum = 0.;
    NumberVector c_proj = DoubleVector.wrap(project(c_ij, c_ij.centroid));
    for (DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
        NumberVector o_proj = DoubleVector.wrap(project(c_ij, relation.get(iter).toArray()));
        sum += distFunc.distance(o_proj, c_proj);
    }
    sum /= c_ij.objectIDs.size();
    return new ProjectedEnergy(i, j, c_ij, sum);
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 57 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class AbstractKMeans method macQueenIterate.

/**
 * Perform a MacQueen style iteration.
 *
 * @param relation Relation
 * @param means Means
 * @param clusters Clusters
 * @param assignment Current cluster assignment
 * @param varsum Variance sum output
 * @return true when the means have changed
 */
protected boolean macQueenIterate(Relation<? extends NumberVector> relation, double[][] means, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum) {
    boolean changed = false;
    Arrays.fill(varsum, 0.);
    // Raw distance function
    final NumberVectorDistanceFunction<?> df = getDistanceFunction();
    // Incremental update
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double mindist = Double.POSITIVE_INFINITY;
        NumberVector fv = relation.get(iditer);
        int minIndex = 0;
        for (int i = 0; i < k; i++) {
            double dist = df.distance(fv, DoubleVector.wrap(means[i]));
            if (dist < mindist) {
                minIndex = i;
                mindist = dist;
            }
        }
        varsum[minIndex] += mindist;
        changed |= updateMeanAndAssignment(clusters, means, minIndex, fv, iditer, assignment);
    }
    return changed;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SparseNumberVector(de.lmu.ifi.dbs.elki.data.SparseNumberVector)

Example 58 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class AbstractKMeans method assignToNearestCluster.

/**
 * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
 * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
 *
 * @param relation the database to cluster
 * @param means a list of k means
 * @param clusters cluster assignment
 * @param assignment Current cluster assignment
 * @param varsum Variance sum output
 * @return true when the object was reassigned
 */
protected boolean assignToNearestCluster(Relation<? extends NumberVector> relation, double[][] means, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum) {
    assert (k == means.length);
    boolean changed = false;
    // Reset all clusters
    Arrays.fill(varsum, 0.);
    for (ModifiableDBIDs cluster : clusters) {
        cluster.clear();
    }
    final NumberVectorDistanceFunction<?> df = getDistanceFunction();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double mindist = Double.POSITIVE_INFINITY;
        NumberVector fv = relation.get(iditer);
        int minIndex = 0;
        for (int i = 0; i < k; i++) {
            double dist = df.distance(fv, DoubleVector.wrap(means[i]));
            if (dist < mindist) {
                minIndex = i;
                mindist = dist;
            }
        }
        varsum[minIndex] += mindist;
        clusters.get(minIndex).add(iditer);
        changed |= assignment.putInt(iditer, minIndex) != minIndex;
    }
    return changed;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SparseNumberVector(de.lmu.ifi.dbs.elki.data.SparseNumberVector)

Example 59 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class EM method assignProbabilitiesToInstances.

/**
 * Assigns the current probability values to the instances in the database and
 * compute the expectation value of the current mixture of distributions.
 *
 * Computed as the sum of the logarithms of the prior probability of each
 * instance.
 *
 * @param relation the database used for assignment to instances
 * @param models Cluster models
 * @param probClusterIGivenX Output storage for cluster probabilities
 * @return the expectation value of the current mixture of distributions
 */
public static double assignProbabilitiesToInstances(Relation<? extends NumberVector> relation, List<? extends EMClusterModel<?>> models, WritableDataStore<double[]> probClusterIGivenX) {
    final int k = models.size();
    double emSum = 0.;
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        NumberVector vec = relation.get(iditer);
        double[] probs = new double[k];
        for (int i = 0; i < k; i++) {
            double v = models.get(i).estimateLogDensity(vec);
            probs[i] = v > MIN_LOGLIKELIHOOD ? v : MIN_LOGLIKELIHOOD;
        }
        final double logP = logSumExp(probs);
        for (int i = 0; i < k; i++) {
            probs[i] = FastMath.exp(probs[i] - logP);
        }
        probClusterIGivenX.put(iditer, probs);
        emSum += logP;
    }
    return emSum / relation.size();
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 60 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class BestOfMultipleKMeans method run.

@Override
public Clustering<M> run(Database database, Relation<V> relation) {
    if (!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
        throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass());
    }
    @SuppressWarnings("unchecked") final NumberVectorDistanceFunction<? super NumberVector> df = (NumberVectorDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction();
    Clustering<M> bestResult = null;
    double bestCost = Double.NaN;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
    for (int i = 0; i < trials; i++) {
        Clustering<M> currentCandidate = innerkMeans.run(database, relation);
        double currentCost = qualityMeasure.quality(currentCandidate, df, relation);
        if (LOG.isVerbose()) {
            LOG.verbose("Cost of candidate " + i + ": " + currentCost);
        }
        if (qualityMeasure.isBetter(currentCost, bestCost)) {
            bestResult = currentCandidate;
            bestCost = currentCost;
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    return bestResult;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) NumberVectorDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) PrimitiveDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4