use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class ScatterData method initializeData.
public void initializeData(GL2 gl) {
length = ids.size();
dim = 0;
vecOffset = -1;
classOffset = -1;
// Scan relations for dimensionalities:
int[] dims = new int[relations.size()];
LinearScale[][] scales = new LinearScale[relations.size()][];
ArrayList<Relation<? extends NumberVector>> vrels = new ArrayList<>(relations.size());
for (int r = 0; r < relations.size(); r++) {
Relation<?> rel = relations.get(r);
final SimpleTypeInformation<?> type = rel.getDataTypeInformation();
if (type instanceof VectorFieldTypeInformation) {
@SuppressWarnings("unchecked") final Relation<? extends NumberVector> vrel = (Relation<? extends NumberVector>) rel;
final int d = ((VectorFieldTypeInformation<?>) type).getDimensionality();
dims[r] = d;
LinearScale[] rscales = new LinearScale[d];
double[][] minmax = RelationUtil.computeMinMax(vrel);
for (int i = 0; i < d; i++) {
rscales[i] = new LinearScale(minmax[0][i], minmax[1][i]);
}
scales[r] = rscales;
vrels.add(vrel);
if (vecOffset < 0) {
vecOffset = dim;
}
dim += d;
} else {
// FIXME: handle other relation types!
dims[r] = 0;
vrels.add(null);
}
}
if (classOffset < 0) {
++dim;
}
LOG.warning("Dimensionalities: " + FormatUtil.format(dims));
// Initialize vertex buffer handles:
assert (vbos[0] == -1);
gl.glGenBuffers(1, vbos, 0);
gl.glBindBuffer(GL.GL_ARRAY_BUFFER, vbos[0]);
gl.glBufferData(GL.GL_ARRAY_BUFFER, //
length * dim * SIZE_FLOAT + // safety padding
3 * SIZE_FLOAT, null, GL2.GL_STATIC_DRAW);
ByteBuffer vbytebuffer = gl.glMapBuffer(GL.GL_ARRAY_BUFFER, GL2.GL_WRITE_ONLY);
FloatBuffer vertices = vbytebuffer.order(ByteOrder.nativeOrder()).asFloatBuffer();
Random rnd = new Random();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
for (int r = 0; r < dims.length; r++) {
if (dims[r] <= 0) {
continue;
}
final Relation<? extends NumberVector> vrel = vrels.get(r);
LinearScale[] rscales = scales[r];
if (vrel != null) {
NumberVector vec = vrel.get(iter);
for (int d = 0; d < dims[r]; d++) {
// vertices.put( rnd.nextFloat());
vertices.put((float) rscales[d].getScaled(vec.doubleValue(d)) * 2.f - 1.f);
}
}
}
if (classOffset < 0) {
vertices.put(rnd.nextInt(30));
}
}
stride = dim * SIZE_FLOAT;
if (classOffset < 0) {
classOffset = (dim - 1) * SIZE_FLOAT;
}
if (vertices.position() != length * dim) {
LOG.warning("Size mismatch: " + vertices.position() + " expected: " + length * dim, new Throwable());
}
vertices.flip();
gl.glUnmapBuffer(GL.GL_ARRAY_BUFFER);
gl.glBindBuffer(GL.GL_ARRAY_BUFFER, 0);
LOG.warning("Size: " + length + " dim: " + dim + " " + vecOffset + " " + classOffset);
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<NumberVector> means = new ArrayList<>(k);
if (ids.size() <= k) {
throw new AbortException("Don't use k-means with k >= data set size.");
}
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
T firstvec = relation.get(first);
means.add(firstvec);
// Initialize weights
double weightsum = initialWeights(weights, ids, firstvec, distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
}
double r = random.nextDouble() * weightsum, s = 0.;
DBIDIter it = ids.iter();
for (; s < r && it.valid(); it.advance()) {
s += weights.doubleValue(it);
}
if (!it.valid()) {
// Rare case, but happens due to floating math
// Decrease
weightsum -= (r - s);
// Retry
continue;
}
// Add new mean:
final T newmean = relation.get(it);
means.add(newmean);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
// Choose optimized version for double distances, if applicable.
weightsum = updateWeights(weights, ids, newmean, distQ);
}
// Explicitly destroy temporary data.
weights.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class EM method recomputeCovarianceMatrices.
/**
* Recompute the covariance matrixes.
*
* @param relation Vector data
* @param probClusterIGivenX Object probabilities
* @param models Cluster models to update
* @param prior MAP prior (use 0 for MLE)
*/
public static void recomputeCovarianceMatrices(Relation<? extends NumberVector> relation, WritableDataStore<double[]> probClusterIGivenX, List<? extends EMClusterModel<?>> models, double prior) {
final int k = models.size();
boolean needsTwoPass = false;
for (EMClusterModel<?> m : models) {
m.beginEStep();
needsTwoPass |= m.needsTwoPass();
}
// First pass, only for two-pass models.
if (needsTwoPass) {
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] clusterProbabilities = probClusterIGivenX.get(iditer);
NumberVector instance = relation.get(iditer);
for (int i = 0; i < clusterProbabilities.length; i++) {
final double prob = clusterProbabilities[i];
if (prob > 1e-10) {
models.get(i).firstPassE(instance, prob);
}
}
}
for (EMClusterModel<?> m : models) {
m.finalizeFirstPassE();
}
}
double[] wsum = new double[k];
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] clusterProbabilities = probClusterIGivenX.get(iditer);
NumberVector instance = relation.get(iditer);
for (int i = 0; i < clusterProbabilities.length; i++) {
final double prob = clusterProbabilities[i];
if (prob > 1e-10) {
models.get(i).updateE(instance, prob);
}
wsum[i] += prob;
}
}
for (int i = 0; i < models.size(); i++) {
EMClusterModel<?> m = models.get(i);
// MLE / MAP
final double weight = prior <= 0. ? wsum[i] / relation.size() : (wsum[i] + prior - 1) / (relation.size() + prior * k - k);
m.finalizeEStep(weight, prior);
}
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class AbstractKMeans method denseMeans.
/**
* Returns the mean vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param means the recent means
* @param relation the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
private static double[][] denseMeans(List<? extends DBIDs> clusters, double[][] means, Relation<? extends NumberVector> relation) {
final int k = means.length;
double[][] newMeans = new double[k][];
for (int i = 0; i < newMeans.length; i++) {
DBIDs list = clusters.get(i);
if (list.isEmpty()) {
// Keep degenerated means as-is for now.
newMeans[i] = means[i];
continue;
}
DBIDIter iter = list.iter();
// Initialize with first.
double[] mean = relation.get(iter).toArray();
// Update with remaining instances
for (iter.advance(); iter.valid(); iter.advance()) {
NumberVector vec = relation.get(iter);
for (int j = 0; j < mean.length; j++) {
mean[j] += vec.doubleValue(j);
}
}
newMeans[i] = timesEquals(mean, 1.0 / list.size());
}
return newMeans;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class RelationUtil method computeMinMax.
/**
* Determines the minimum and maximum values in each dimension of all objects
* stored in the given database.
*
* @param relation the database storing the objects
* @return Minimum and Maximum vector for the hyperrectangle
*/
public static double[][] computeMinMax(Relation<? extends NumberVector> relation) {
int dim = RelationUtil.dimensionality(relation);
double[] mins = new double[dim], maxs = new double[dim];
for (int i = 0; i < dim; i++) {
mins[i] = Double.MAX_VALUE;
maxs[i] = -Double.MAX_VALUE;
}
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final NumberVector o = relation.get(iditer);
for (int d = 0; d < dim; d++) {
final double v = o.doubleValue(d);
mins[d] = (v < mins[d]) ? v : mins[d];
maxs[d] = (v > maxs[d]) ? v : maxs[d];
}
}
return new double[][] { mins, maxs };
}
Aggregations