use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class ParallelLloydKMeans method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Choose initial means
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Store for current cluster assignment.
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
KMeansProcessor<V> kmm = new KMeansProcessor<>(relation, distanceFunction, assignment, varsum);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
kmm.nextIteration(means);
ParallelExecutor.run(ids, kmm);
// Stop if no cluster assignment changed.
if (!kmm.changed()) {
break;
}
means = kmm.getMeans();
}
LOG.setCompleted(prog);
// Wrap result
ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.length; i++) {
DBIDs cids = clusters[i];
if (cids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(cids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class DWOF method run.
/**
* Performs the Generalized DWOF_SCORE algorithm on the given database by
* calling all the other methods in the proper order.
*
* @param database Database to query
* @param relation Data to process
* @return new OutlierResult instance
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DBIDs ids = relation.getDBIDs();
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Get k nearest neighbor and range query on the relation.
KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
// DWOF output score storage.
WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
if (stepProg != null) {
stepProg.beginStep(1, "Initializing objects' Radii", LOG);
}
WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
// Find an initial radius for each object:
initializeRadii(ids, knnq, distFunc, radii);
WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
int countUnmerged = relation.size();
if (stepProg != null) {
stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
}
IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
while (countUnmerged > 0) {
LOG.incrementProcessed(clusEvalProgress);
// Increase radii
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, radii.doubleValue(iter) * delta);
}
// stores the clustering label for each object
WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
// Cluster objects based on the current radius
clusterData(ids, rnnQuery, radii, labels);
// simple reference swap
WritableIntegerDataStore temp = newSizes;
newSizes = oldSizes;
oldSizes = temp;
// Update the cluster size count for each object.
countUnmerged = updateSizes(ids, labels, newSizes);
labels.destroy();
// Update DWOF scores.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
}
}
LOG.setCompleted(clusEvalProgress);
LOG.setCompleted(stepProg);
// Build result representation.
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
minmax.put(dwofs.doubleValue(iter));
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class COP method run.
/**
* Process a single relation.
*
* @param relation Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
final int dim = RelationUtil.dimensionality(relation);
if (k <= dim + 1) {
LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
}
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = null;
WritableIntegerDataStore cop_dim = null;
if (models) {
cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
}
// compute neighbors of each db object
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
// Do not use query object
nids.remove(id);
double[] centroid = Centroid.make(relation, nids).getArrayRef();
double[] relative = minusEquals(relation.get(id).toArray(), centroid);
PCAResult pcares = pca.processIds(nids, relation);
double[][] evecs = pcares.getEigenvectors();
double[] projected = transposeTimes(evecs, relative);
double[] evs = pcares.getEigenvalues();
double min = Double.POSITIVE_INFINITY;
int vdim = dim;
switch(dist) {
case CHISQUARED:
{
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Evaluate
double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
case GAMMA:
{
double[][] dists = new double[dim][nids.size()];
int j = 0;
double[] srel = new double[dim];
for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
V vec = relation.get(s);
for (int d = 0; d < dim; d++) {
srel[d] = vec.doubleValue(d) - centroid[d];
}
double[] serr = transposeTimes(evecs, srel);
double sqdist = 0.0;
for (int d = 0; d < dim; d++) {
double serrd = serr[d];
sqdist += serrd * serrd / evs[d];
dists[d][j] = sqdist;
}
j++;
}
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
final double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Sort, so we can trim the top 15% below.
Arrays.sort(dists[d]);
// Evaluate
double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
}
// Normalize the value
final double prob = expect * (1 - min) / (expect + min);
// Construct the error vector:
for (int d = vdim; d < dim; d++) {
projected[d] = 0.;
}
double[] ev = timesEquals(times(evecs, projected), -1 * prob);
cop_score.putDouble(id, prob);
if (models) {
cop_err_v.put(id, ev);
cop_dim.putInt(id, dim + 1 - vdim);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
if (models) {
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class UKMeans method run.
/**
* Run the clustering.
*
* @param database the Database
* @param relation the Relation
* @return Clustering result
*/
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
if (relation.size() <= 0) {
return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
}
// Choose initial means randomly
DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<double[]> means = new ArrayList<>(k);
for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
}
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.isEmpty()) {
continue;
}
result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class KMeansBatchedLloyd method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
double[][] meanshift = new double[k][dim];
int[] changesize = new int[k];
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = false;
FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
for (int p = 0; p < parts.length; p++) {
// Initialize new means scratch space.
for (int i = 0; i < k; i++) {
Arrays.fill(meanshift[i], 0.);
}
Arrays.fill(changesize, 0);
Arrays.fill(varsum, 0.);
changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
// Recompute means.
updateMeans(means, meanshift, clusters, changesize);
LOG.incrementProcessed(pprog);
}
LOG.ensureCompleted(pprog);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
Aggregations