Search in sources :

Example 6 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class ScatterData method initializeData.

public void initializeData(GL2 gl) {
    length = ids.size();
    dim = 0;
    vecOffset = -1;
    classOffset = -1;
    // Scan relations for dimensionalities:
    int[] dims = new int[relations.size()];
    LinearScale[][] scales = new LinearScale[relations.size()][];
    ArrayList<Relation<? extends NumberVector>> vrels = new ArrayList<>(relations.size());
    for (int r = 0; r < relations.size(); r++) {
        Relation<?> rel = relations.get(r);
        final SimpleTypeInformation<?> type = rel.getDataTypeInformation();
        if (type instanceof VectorFieldTypeInformation) {
            @SuppressWarnings("unchecked") final Relation<? extends NumberVector> vrel = (Relation<? extends NumberVector>) rel;
            final int d = ((VectorFieldTypeInformation<?>) type).getDimensionality();
            dims[r] = d;
            LinearScale[] rscales = new LinearScale[d];
            double[][] minmax = RelationUtil.computeMinMax(vrel);
            for (int i = 0; i < d; i++) {
                rscales[i] = new LinearScale(minmax[0][i], minmax[1][i]);
            }
            scales[r] = rscales;
            vrels.add(vrel);
            if (vecOffset < 0) {
                vecOffset = dim;
            }
            dim += d;
        } else {
            // FIXME: handle other relation types!
            dims[r] = 0;
            vrels.add(null);
        }
    }
    if (classOffset < 0) {
        ++dim;
    }
    LOG.warning("Dimensionalities: " + FormatUtil.format(dims));
    // Initialize vertex buffer handles:
    assert (vbos[0] == -1);
    gl.glGenBuffers(1, vbos, 0);
    gl.glBindBuffer(GL.GL_ARRAY_BUFFER, vbos[0]);
    gl.glBufferData(GL.GL_ARRAY_BUFFER, // 
    length * dim * SIZE_FLOAT + // safety padding
    3 * SIZE_FLOAT, null, GL2.GL_STATIC_DRAW);
    ByteBuffer vbytebuffer = gl.glMapBuffer(GL.GL_ARRAY_BUFFER, GL2.GL_WRITE_ONLY);
    FloatBuffer vertices = vbytebuffer.order(ByteOrder.nativeOrder()).asFloatBuffer();
    Random rnd = new Random();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        for (int r = 0; r < dims.length; r++) {
            if (dims[r] <= 0) {
                continue;
            }
            final Relation<? extends NumberVector> vrel = vrels.get(r);
            LinearScale[] rscales = scales[r];
            if (vrel != null) {
                NumberVector vec = vrel.get(iter);
                for (int d = 0; d < dims[r]; d++) {
                    // vertices.put( rnd.nextFloat());
                    vertices.put((float) rscales[d].getScaled(vec.doubleValue(d)) * 2.f - 1.f);
                }
            }
        }
        if (classOffset < 0) {
            vertices.put(rnd.nextInt(30));
        }
    }
    stride = dim * SIZE_FLOAT;
    if (classOffset < 0) {
        classOffset = (dim - 1) * SIZE_FLOAT;
    }
    if (vertices.position() != length * dim) {
        LOG.warning("Size mismatch: " + vertices.position() + " expected: " + length * dim, new Throwable());
    }
    vertices.flip();
    gl.glUnmapBuffer(GL.GL_ARRAY_BUFFER);
    gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);
    LOG.warning("Size: " + length + " dim: " + dim + " " + vecOffset + " " + classOffset);
}
Also used : LinearScale(de.lmu.ifi.dbs.elki.math.scales.LinearScale) ArrayList(java.util.ArrayList) FloatBuffer(java.nio.FloatBuffer) ByteBuffer(java.nio.ByteBuffer) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)

Example 7 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<NumberVector> means = new ArrayList<>(k);
    if (ids.size() <= k) {
        throw new AbortException("Don't use k-means with k >= data set size.");
    }
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    T firstvec = relation.get(first);
    means.add(firstvec);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, firstvec, distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
        }
        double r = random.nextDouble() * weightsum, s = 0.;
        DBIDIter it = ids.iter();
        for (; s < r && it.valid(); it.advance()) {
            s += weights.doubleValue(it);
        }
        if (!it.valid()) {
            // Rare case, but happens due to floating math
            // Decrease
            weightsum -= (r - s);
            // Retry
            continue;
        }
        // Add new mean:
        final T newmean = relation.get(it);
        means.add(newmean);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        // Choose optimized version for double distances, if applicable.
        weightsum = updateWeights(weights, ids, newmean, distQ);
    }
    // Explicitly destroy temporary data.
    weights.destroy();
    return unboxVectors(means);
}
Also used : Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 8 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class EM method recomputeCovarianceMatrices.

/**
 * Recompute the covariance matrixes.
 *
 * @param relation Vector data
 * @param probClusterIGivenX Object probabilities
 * @param models Cluster models to update
 * @param prior MAP prior (use 0 for MLE)
 */
public static void recomputeCovarianceMatrices(Relation<? extends NumberVector> relation, WritableDataStore<double[]> probClusterIGivenX, List<? extends EMClusterModel<?>> models, double prior) {
    final int k = models.size();
    boolean needsTwoPass = false;
    for (EMClusterModel<?> m : models) {
        m.beginEStep();
        needsTwoPass |= m.needsTwoPass();
    }
    // First pass, only for two-pass models.
    if (needsTwoPass) {
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double[] clusterProbabilities = probClusterIGivenX.get(iditer);
            NumberVector instance = relation.get(iditer);
            for (int i = 0; i < clusterProbabilities.length; i++) {
                final double prob = clusterProbabilities[i];
                if (prob > 1e-10) {
                    models.get(i).firstPassE(instance, prob);
                }
            }
        }
        for (EMClusterModel<?> m : models) {
            m.finalizeFirstPassE();
        }
    }
    double[] wsum = new double[k];
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double[] clusterProbabilities = probClusterIGivenX.get(iditer);
        NumberVector instance = relation.get(iditer);
        for (int i = 0; i < clusterProbabilities.length; i++) {
            final double prob = clusterProbabilities[i];
            if (prob > 1e-10) {
                models.get(i).updateE(instance, prob);
            }
            wsum[i] += prob;
        }
    }
    for (int i = 0; i < models.size(); i++) {
        EMClusterModel<?> m = models.get(i);
        // MLE / MAP
        final double weight = prior <= 0. ? wsum[i] / relation.size() : (wsum[i] + prior - 1) / (relation.size() + prior * k - k);
        m.finalizeEStep(weight, prior);
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 9 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class AbstractKMeans method denseMeans.

/**
 * Returns the mean vectors of the given clusters in the given database.
 *
 * @param clusters the clusters to compute the means
 * @param means the recent means
 * @param relation the database containing the vectors
 * @return the mean vectors of the given clusters in the given database
 */
private static double[][] denseMeans(List<? extends DBIDs> clusters, double[][] means, Relation<? extends NumberVector> relation) {
    final int k = means.length;
    double[][] newMeans = new double[k][];
    for (int i = 0; i < newMeans.length; i++) {
        DBIDs list = clusters.get(i);
        if (list.isEmpty()) {
            // Keep degenerated means as-is for now.
            newMeans[i] = means[i];
            continue;
        }
        DBIDIter iter = list.iter();
        // Initialize with first.
        double[] mean = relation.get(iter).toArray();
        // Update with remaining instances
        for (iter.advance(); iter.valid(); iter.advance()) {
            NumberVector vec = relation.get(iter);
            for (int j = 0; j < mean.length; j++) {
                mean[j] += vec.doubleValue(j);
            }
        }
        newMeans[i] = timesEquals(mean, 1.0 / list.size());
    }
    return newMeans;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SparseNumberVector(de.lmu.ifi.dbs.elki.data.SparseNumberVector)

Example 10 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class RelationUtil method computeMinMax.

/**
 * Determines the minimum and maximum values in each dimension of all objects
 * stored in the given database.
 *
 * @param relation the database storing the objects
 * @return Minimum and Maximum vector for the hyperrectangle
 */
public static double[][] computeMinMax(Relation<? extends NumberVector> relation) {
    int dim = RelationUtil.dimensionality(relation);
    double[] mins = new double[dim], maxs = new double[dim];
    for (int i = 0; i < dim; i++) {
        mins[i] = Double.MAX_VALUE;
        maxs[i] = -Double.MAX_VALUE;
    }
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final NumberVector o = relation.get(iditer);
        for (int d = 0; d < dim; d++) {
            final double v = o.doubleValue(d);
            mins[d] = (v < mins[d]) ? v : mins[d];
            maxs[d] = (v > maxs[d]) ? v : maxs[d];
        }
    }
    return new double[][] { mins, maxs };
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4