use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class KMeansBatchedLloyd method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
double[][] meanshift = new double[k][dim];
int[] changesize = new int[k];
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = false;
FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
for (int p = 0; p < parts.length; p++) {
// Initialize new means scratch space.
for (int i = 0; i < k; i++) {
Arrays.fill(meanshift[i], 0.);
}
Arrays.fill(changesize, 0);
Arrays.fill(varsum, 0.);
changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
// Recompute means.
updateMeans(means, meanshift, clusters, changesize);
LOG.incrementProcessed(pprog);
}
LOG.ensureCompleted(pprog);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class LinearScanRKNNQuery method getRKNNForBulkDBIDs.
@Override
public List<? extends DoubleDBIDList> getRKNNForBulkDBIDs(ArrayDBIDs ids, int k) {
List<ModifiableDoubleDBIDList> rNNList = new ArrayList<>(ids.size());
for (int i = 0; i < ids.size(); i++) {
rNNList.add(DBIDUtil.newDistanceDBIDList());
}
ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(allIDs, k);
int i = 0;
for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
KNNList knn = kNNList.get(i);
for (DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
int j = 0;
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
if (DBIDUtil.equal(n, iter2)) {
ModifiableDoubleDBIDList rNN = rNNList.get(j);
rNN.add(n.doubleValue(), iter);
}
j++;
}
}
i++;
}
for (int j = 0; j < ids.size(); j++) {
rNNList.get(j).sort();
}
return rNNList;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class SigmoidOutlierScalingFunction method prepare.
@Override
public void prepare(OutlierResult or) {
// Initial parameters - are these defaults sounds?
MeanVariance mv = new MeanVariance();
DoubleRelation scores = or.getScores();
for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
double val = scores.doubleValue(id);
mv.put(val);
}
double a = 1.0;
double b = -mv.getMean();
int iter = 0;
ArrayDBIDs ids = DBIDUtil.ensureArray(or.getScores().getDBIDs());
DBIDArrayIter it = ids.iter();
long[] t = BitsUtil.zero(ids.size());
boolean changing = true;
while (changing) {
changing = false;
// E-Step
it.seek(0);
for (int i = 0; i < ids.size(); i++, it.advance()) {
double val = or.getScores().doubleValue(it);
double targ = a * val + b;
if (targ > 0) {
if (!BitsUtil.get(t, i)) {
BitsUtil.setI(t, i);
changing = true;
}
} else {
if (BitsUtil.get(t, i)) {
BitsUtil.clearI(t, i);
changing = true;
}
}
}
if (!changing) {
break;
}
// logger.debugFine("Number of outliers in sigmoid: " + t.cardinality());
// M-Step
// Implementation based on:<br />
// H.-T. Lin, C.-J. Lin, R. C. Weng:<br />
// A Note on Platt’s Probabilistic Outputs for Support Vector Machines
{
double[] newab = MStepLevenbergMarquardt(a, b, ids, t, or.getScores());
a = newab[0];
b = newab[1];
}
iter++;
if (iter > 100) {
LOG.warning("Max iterations met in sigmoid fitting.");
break;
}
}
Afinal = a;
Bfinal = b;
LOG.debugFine("A = " + Afinal + " B = " + Bfinal);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class PerplexityAffinityMatrixBuilder method computeAffinityMatrix.
@Override
public <T extends O> AffinityMatrix computeAffinityMatrix(Relation<T> relation, double initialScale) {
DistanceQuery<T> dq = relation.getDistanceQuery(distanceFunction);
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Compute desired affinities.
double[][] dist = buildDistanceMatrix(ids, dq);
return new DenseAffinityMatrix(computePij(dist, perplexity, initialScale), ids);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.
the class DistanceQuantileSampler method run.
/**
* Run the distance quantile sampler.
*
* @param database
* @param rel
* @return Distances sample
*/
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
int size = rel.size();
long pairs = (size * (long) size) >> 1;
final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
if (ssize > Integer.MAX_VALUE) {
throw new AbortException("Sampling size too large.");
}
final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
Random r = rand.getSingleThreadedRandom();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
for (long i = 0; i < ssize; i++) {
int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
double dist = dq.distance(i1.seek(x), i2.seek(y));
// Skip NaN, and/or zeros.
if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
continue;
}
heap.add(dist, qsize);
LOG.incrementProcessed(prog);
}
LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
LOG.ensureCompleted(prog);
Collection<String> header = Arrays.asList(new String[] { "Distance" });
Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Aggregations