use of de.lmu.ifi.dbs.elki.result.HistogramResult in project elki by elki-project.
the class HistogramVisualization method makeVisualization.
@Override
public Visualization makeVisualization(VisualizerContext context, VisualizationTask task, VisualizationPlot plot, double width, double height, Projection proj) {
HistogramResult curve = task.getResult();
final StyleLibrary style = context.getStyleLibrary();
final double sizex = StyleLibrary.SCALE;
final double sizey = StyleLibrary.SCALE * height / width;
final double margin = style.getSize(StyleLibrary.MARGIN);
Element layer = SVGUtil.svgElement(plot.getDocument(), SVGConstants.SVG_G_TAG);
final String transform = SVGUtil.makeMarginTransform(width, height, sizex, sizey, margin);
SVGUtil.setAtt(layer, SVGConstants.SVG_TRANSFORM_ATTRIBUTE, transform);
// find maximum, determine step size
int dim = -1;
DoubleMinMax xminmax = new DoubleMinMax();
DoubleMinMax yminmax = new DoubleMinMax();
for (double[] point : curve) {
xminmax.put(point[0]);
dim = dim < point.length ? point.length : dim;
for (int i = 1; i < point.length; i++) {
yminmax.put(point[i]);
}
}
// Minimum should always start at 0 for histograms
yminmax.put(0.0);
// remove one dimension which are the x values.
dim = dim - 1;
int size = curve.size();
double range = xminmax.getMax() - xminmax.getMin();
double binwidth = range / (size - 1);
LinearScale xscale = new LinearScale(xminmax.getMin() - binwidth * .49999, xminmax.getMax() + binwidth * .49999);
LinearScale yscale = new LinearScale(yminmax.getMin(), yminmax.getMax());
SVGPath[] path = new SVGPath[dim];
for (int i = 0; i < dim; i++) {
path[i] = new SVGPath(sizex * xscale.getScaled(xminmax.getMin() - binwidth * .5), sizey);
}
// draw curves.
for (double[] point : curve) {
for (int d = 0; d < dim; d++) {
path[d].lineTo(sizex * (xscale.getScaled(point[0] - binwidth * .5)), sizey * (1 - yscale.getScaled(point[d + 1])));
path[d].lineTo(sizex * (xscale.getScaled(point[0] + binwidth * .5)), sizey * (1 - yscale.getScaled(point[d + 1])));
}
}
// close all histograms
for (int i = 0; i < dim; i++) {
path[i].lineTo(sizex * xscale.getScaled(xminmax.getMax() + binwidth * .5), sizey);
}
// add axes
try {
SVGSimpleLinearAxis.drawAxis(plot, layer, yscale, 0, sizey, 0, 0, SVGSimpleLinearAxis.LabelStyle.LEFTHAND, style);
SVGSimpleLinearAxis.drawAxis(plot, layer, xscale, 0, sizey, sizex, sizey, SVGSimpleLinearAxis.LabelStyle.RIGHTHAND, style);
} catch (CSSNamingConflict e) {
LoggingUtil.exception(e);
}
// Setup line styles and insert lines.
ColorLibrary cl = style.getColorSet(StyleLibrary.PLOT);
for (int d = 0; d < dim; d++) {
CSSClass csscls = new CSSClass(this, SERIESID + "_" + d);
csscls.setStatement(SVGConstants.SVG_FILL_ATTRIBUTE, SVGConstants.SVG_NONE_VALUE);
csscls.setStatement(SVGConstants.SVG_STROKE_ATTRIBUTE, cl.getColor(d));
csscls.setStatement(SVGConstants.SVG_STROKE_WIDTH_ATTRIBUTE, style.getLineWidth(StyleLibrary.PLOT));
plot.addCSSClassOrLogError(csscls);
Element line = path[d].makeElement(plot);
line.setAttribute(SVGConstants.SVG_CLASS_ATTRIBUTE, csscls.getName());
layer.appendChild(line);
}
return new StaticVisualizationInstance(context, task, plot, width, height, layer);
}
use of de.lmu.ifi.dbs.elki.result.HistogramResult in project elki by elki-project.
the class DistanceStatisticsWithClasses method run.
@Override
public HistogramResult run(Database database) {
final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
// determine binning ranges.
DoubleMinMax gminmax = new DoubleMinMax();
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// global in-cluster min/max
DoubleMinMax giminmax = new DoubleMinMax();
// global other-cluster min/max
DoubleMinMax gominmax = new DoubleMinMax();
// in-cluster distances
MeanVariance mimin = new MeanVariance();
MeanVariance mimax = new MeanVariance();
MeanVariance midif = new MeanVariance();
// other-cluster distances
MeanVariance momin = new MeanVariance();
MeanVariance momax = new MeanVariance();
MeanVariance modif = new MeanVariance();
// Histogram
final ObjHistogram<long[]> histogram;
LOG.beginStep(stepprog, 1, "Prepare histogram.");
if (exact) {
gminmax = exactMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else if (sampling) {
gminmax = sampleMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else {
histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {
@Override
protected long[] downsample(Object[] data, int start, int end, int size) {
long[] ret = new long[2];
for (int i = start; i < end; i++) {
long[] existing = (long[]) data[i];
if (existing != null) {
for (int c = 0; c < 2; c++) {
ret[c] += existing[c];
}
}
}
return ret;
}
@Override
protected long[] aggregate(long[] first, long[] second) {
for (int c = 0; c < 2; c++) {
first[c] += second[c];
}
return first;
}
@Override
protected long[] cloneForCache(long[] data) {
return data.clone();
}
@Override
protected long[] makeObject() {
return new long[2];
}
};
}
LOG.beginStep(stepprog, 2, "Build histogram.");
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
// iterate per cluster
final long[] incFirst = new long[] { 1L, 0L };
final long[] incSecond = new long[] { 0L, 1L };
for (Cluster<?> c1 : split) {
for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself.
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incFirst);
iminmax.put(d);
}
// aggregate
mimin.put(iminmax.getMin());
mimax.put(iminmax.getMax());
midif.put(iminmax.getDiff());
// min/max
giminmax.put(iminmax.getMin());
giminmax.put(iminmax.getMax());
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
for (Cluster<?> c2 : split) {
if (c2 == c1) {
continue;
}
for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself (shouldn't happen though)
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incSecond);
ominmax.put(d);
}
}
// aggregate
momin.put(ominmax.getMin());
momax.put(ominmax.getMax());
modif.put(ominmax.getDiff());
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Update values (only needed for sampling case).
gminmax.put(gominmax);
LOG.setCompleted(stepprog);
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
inum += iter.getValue()[0];
onum += iter.getValue()[1];
}
long bnum = inum + onum;
Collection<double[]> binstat = new ArrayList<>(numbin);
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
final long[] value = iter.getValue();
final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
final double ocof = (onum == 0) ? 0 : ((double) value[1]) / onum / histogram.getBinsize();
final double ocaf = ((double) value[1]) / bnum / histogram.getBinsize();
binstat.add(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
}
HistogramResult result = new HistogramResult("Distance Histogram", "distance-histogram", binstat);
result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
return result;
}
use of de.lmu.ifi.dbs.elki.result.HistogramResult in project elki by elki-project.
the class RankingQualityHistogram method run.
/**
* Process a database
*
* @param database Database to process
* @param relation Relation to process
* @return Histogram of ranking qualities
*/
public HistogramResult run(Database database, Relation<O> relation) {
final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
MeanVariance mv = new MeanVariance();
// sort neighbors
for (Cluster<?> clus : split) {
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
mv.put(result);
hist.increment(result, 1. / relation.size());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue() });
}
HistogramResult result = new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance());
return result;
}
use of de.lmu.ifi.dbs.elki.result.HistogramResult in project elki by elki-project.
the class ComputeOutlierHistogram method evaluateOutlierResult.
/**
* Evaluate a single outlier result as histogram.
*
* @param database Database to process
* @param or Outlier result
* @return Result
*/
public HistogramResult evaluateOutlierResult(Database database, OutlierResult or) {
if (scaling instanceof OutlierScalingFunction) {
OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
oscaling.prepare(or);
}
ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
// first value for outliers, second for each object
// If we have useful (finite) min/max, use these for binning.
double min = scaling.getMin();
double max = scaling.getMax();
final ObjHistogram<DoubleDoublePair> hist;
if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {
@Override
public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
first.first += second.first;
first.second += second.second;
return first;
}
@Override
protected DoubleDoublePair makeObject() {
return new DoubleDoublePair(0., 0.);
}
@Override
protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
return new DoubleDoublePair(data.first, data.second);
}
@Override
protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
DoubleDoublePair sum = new DoubleDoublePair(0, 0);
for (int i = start; i < end; i++) {
DoubleDoublePair p = (DoubleDoublePair) data[i];
if (p != null) {
sum.first += p.first;
sum.second += p.second;
}
}
return sum;
}
};
} else {
hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {
@Override
protected DoubleDoublePair makeObject() {
return new DoubleDoublePair(0., 0.);
}
@Override
public void putData(double coord, DoubleDoublePair data) {
DoubleDoublePair exist = get(coord);
exist.first += data.first;
exist.second += data.second;
}
};
}
// first fill histogram only with values of outliers
DoubleDoublePair negative, positive;
if (!splitfreq) {
negative = new DoubleDoublePair(1. / ids.size(), 0);
positive = new DoubleDoublePair(0, 1. / ids.size());
} else {
negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
positive = new DoubleDoublePair(0, 1. / outlierIds.size());
}
ids.removeDBIDs(outlierIds);
// fill histogram with values of each object
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
hist.putData(result, negative);
}
}
for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
hist.putData(result, positive);
}
}
Collection<double[]> collHist = new ArrayList<>(hist.getNumBins());
for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleDoublePair data = iter.getValue();
collHist.add(new double[] { iter.getCenter(), data.first, data.second });
}
return new HistogramResult("Outlier Score Histogram", "outlier-histogram", collHist);
}
use of de.lmu.ifi.dbs.elki.result.HistogramResult in project elki by elki-project.
the class EvaluateRankingQuality method run.
@Override
public HistogramResult run(Database database) {
final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<V> knnQuery = database.getKNNQuery(distQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// Compute cluster averages and covariance matrix
HashMap<Cluster<?>, double[]> averages = new HashMap<>(split.size());
HashMap<Cluster<?>, double[][]> covmats = new HashMap<>(split.size());
for (Cluster<?> clus : split) {
CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
averages.put(clus, covmat.getMeanVector());
covmats.put(clus, covmat.destroyToPopulationMatrix());
}
MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
// sort neighbors
for (Cluster<?> clus : split) {
ModifiableDoubleDBIDList cmem = DBIDUtil.newDistanceDBIDList(clus.size());
double[] av = averages.get(clus);
double[][] covm = covmats.get(clus);
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
double d = mahalanobisDistance(covm, relation.get(iter).toArray(), av);
cmem.add(d, iter);
}
cmem.sort();
for (DBIDArrayIter it = cmem.iter(); it.valid(); it.advance()) {
KNNList knn = knnQuery.getKNNForDBID(it, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
hist.put(((double) it.getOffset()) / clus.size(), result);
LOG.incrementProcessed(rocloop);
}
}
LOG.ensureCompleted(rocloop);
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
}
return new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
}
Aggregations