use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class KMediansLloyd method run.
@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
}
// Choose initial medians
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] distsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, medians, clusters, assignment, distsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute medians.
medians = medians(clusters, medians, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<MeanModel> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
for (int i = 0; i < clusters.size(); i++) {
MeanModel model = new MeanModel(medians[i]);
result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class KMedoidsPAM method run.
/**
* Run k-medoids
*
* @param database Database
* @param relation relation to use
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("PAM Clustering", "pam-clustering");
}
if (k > 0x7FFF) {
throw new NotImplementedException("PAM supports at most " + 0x7FFF + " clusters.");
}
DistanceQuery<V> distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG);
DBIDs ids = relation.getDBIDs();
// Choose initial medoids
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, ids, distQ));
if (medoids.size() != k) {
throw new AbortException("Initializer " + initializer.toString() + " did not return " + k + " means, but " + medoids.size());
}
// Setup cluster assignment store
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
run(distQ, ids, medoids, assignment);
ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
// Wrap result
Clustering<MedoidModel> result = new Clustering<>("PAM Clustering", "pam-clustering");
for (DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) {
result.addToplevelCluster(new Cluster<>(clusters[it.getOffset()], new MedoidModel(DBIDUtil.deref(it))));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class SingleAssignmentKMeans method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Assignment", "kmeans-assignment");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
assignToNearestCluster(relation, means, clusters, assignment, varsum);
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("Nearest Centroid Clustering", "nearest-center-clustering");
for (int i = 0; i < clusters.size(); i++) {
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class SimpleCOP method run.
public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
DBIDs ids = data.getDBIDs();
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
{
// compute neighbors of each db object
FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
double sqrt2 = MathUtil.SQRT2;
for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
nids.remove(id);
// TODO: do we want to use the query point as centroid?
CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
double stddev = depsol.getStandardDeviation();
double distance = depsol.distance(data.get(id));
double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
cop_score.putDouble(id, prob);
cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
double[][] datav = depsol.dataProjections(data.get(id));
cop_datav.put(id, datav);
cop_dim.putInt(id, depsol.getCorrelationDimensionality());
cop_sol.put(id, depsol);
LOG.incrementProcessed(progressLocalPCA);
}
LOG.ensureCompleted(progressLocalPCA);
}
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// extra results
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
return result;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore in project elki by elki-project.
the class OPTICSOF method run.
/**
* Perform OPTICS-based outlier detection.
*
* @param database Database
* @param relation Relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
double d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
coreDistance.putDouble(iditer, d);
minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
}
// Pass 2
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
double dist = distQuery.distance(iditer, neighbor);
double rd = MathUtil.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
}
lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
reachDistance.put(iditer, core);
lrds.putDouble(iditer, lrd);
}
// Pass 3
DoubleMinMax ofminmax = new DoubleMinMax();
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.intValue(iditer);
ofs.putDouble(iditer, of);
// update minimum and maximum
ofminmax.put(of);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
Aggregations