use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class OutlierRankingEvaluation method evaluateOutlierResult.
private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) {
EvaluationResult res = EvaluationResult.findOrCreate(or.getHierarchy(), or, "Evaluation of ranking", "ranking-evaluation");
DBIDsTest test = new DBIDsTest(positiveids);
final int pos = positiveids.size();
final double rate = pos / (double) size;
MeasurementGroup g = res.findOrCreateGroup("Evaluation measures");
double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
if (!g.hasMeasure("ROC AUC")) {
g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
}
double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("Average Precision", avep, 0., 1., rate, false);
double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
double maxdcg = DCGEvaluation.maximum(pos);
double dcg = DCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("DCG", dcg, 0., maxdcg, DCGEvaluation.STATIC.expected(pos, size), false);
double ndcg = NDCGEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
g.addMeasure("NDCG", ndcg, 0., 1., NDCGEvaluation.STATIC.expected(pos, size), false);
g = res.findOrCreateGroup("Adjusted for chance");
double adjauc = 2 * rocauc - 1;
g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
double adjavep = (avep - rate) / (1 - rate);
g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
double adjrprec = (rprec - rate) / (1 - rate);
g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
double adjmaxf1 = (maxf1 - rate) / (1 - rate);
g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
double endcg = NDCGEvaluation.STATIC.expected(pos, size);
double adjndcg = (ndcg - endcg) / (1. - endcg);
g.addMeasure("Adjusted DCG", adjndcg, 0., 1., 0., false);
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
LOG.statistics(new DoubleStatistic(key + ".dcg", dcg));
LOG.statistics(new DoubleStatistic(key + ".dcg.normalized", ndcg));
LOG.statistics(new DoubleStatistic(key + ".dcg.adjusted", adjndcg));
}
return res;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class KMeansElkan method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
// Elkan bounds
WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
WritableDataStore<double[]> lower = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, double[].class);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Filled with 0.
lower.put(it, new double[k]);
}
// Storage for updated means:
final int dim = means[0].length;
double[][] sums = new double[k][dim];
// Cluster separation
double[] sep = new double[k];
// Cluster distances
double[][] cdist = new double[k][k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
int changed;
if (iteration == 0) {
changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
} else {
// #1
recomputeSeperation(means, sep, cdist);
changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, cdist, upper, lower);
}
if (rstat != null) {
rstat.setLong(changed);
LOG.statistics(rstat);
}
// Stop if no cluster assignment changed.
if (changed == 0) {
break;
}
// Recompute means.
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
timesEquals(sums[i], s > 0 ? 1. / s : 1.);
}
// Overwrites sep
maxMoved(means, sums, sep);
updateBounds(relation, assignment, upper, lower, sep);
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
System.arraycopy(sums[i], 0, means[i], 0, dim);
// Restore to sum for next iteration
timesEquals(sums[i], s > 0 ? s : 1.);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
upper.destroy();
lower.destroy();
// Wrap result
double totalvariance = 0.;
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
double[] mean = means[i];
double varsum = 0.;
if (varstat) {
DoubleVector mvec = DoubleVector.wrap(mean);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
varsum += distanceFunction.distance(mvec, relation.get(it));
}
totalvariance += varsum;
}
KMeansModel model = new KMeansModel(mean, varsum);
result.addToplevelCluster(new Cluster<>(ids, model));
}
if (LOG.isStatistics() && varstat) {
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class KMeansHybridLloydMacQueen method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration += 2) {
{
// MacQueen
LOG.incrementProcessed(prog);
boolean changed = macQueenIterate(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
if (!changed) {
break;
}
}
{
// Lloyd
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class KMeansMacQueen method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
// Iterate MacQueen
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = macQueenIterate(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
if (!changed) {
break;
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class KMeansSort method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
// Cluster distances
double[][] cdist = new double[k][k];
int[][] cnum = new int[k][k - 1];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
LongStatistic diststat = LOG.isStatistics() ? new LongStatistic(KEY + ".distance-computations") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
recomputeSeperation(means, cdist, cnum, diststat);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum, cdist, cnum, diststat);
logVarstat(varstat, varsum);
if (LOG.isStatistics()) {
LOG.statistics(diststat);
}
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
Aggregations