use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class VisualizePairwiseGainMatrix method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
ResultHierarchy hier = database.getHierarchy();
Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
final String firstlabel = labels.get(firstid);
if (!firstlabel.matches(".*by.?label.*")) {
throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
}
relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
// Dimensionality and reference vector
final int dim = RelationUtil.dimensionality(relation);
final NumberVector refvec = relation.get(firstid);
// Build the truth vector
VectorNonZero pos = new VectorNonZero(refvec);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
ids.remove(firstid);
ids.sort();
final int size = ids.size();
double[][] data = new double[size][size];
DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
// Vote combination buffer.
double[] buf = new double[2];
int a = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
final NumberVector veca = relation.get(id);
// Direct AUC score:
{
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
data[a][a] = auc;
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
// Compare to others, exploiting symmetry
DBIDArrayIter id2 = ids.iter();
id2.seek(a + 1);
for (int b = a + 1; b < size; b++, id2.advance()) {
final NumberVector vecb = relation.get(id2);
double[] combined = new double[dim];
for (int d = 0; d < dim; d++) {
buf[0] = veca.doubleValue(d);
buf[1] = vecb.doubleValue(d);
combined[d] = voting.combine(buf);
}
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
// logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
data[a][b] = auc;
data[b][a] = auc;
commax.put(data[a][b]);
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
}
for (int a = 0; a < size; a++) {
for (int b = a + 1; b < size; b++) {
double ref = Math.max(data[a][a], data[b][b]);
data[a][b] = (data[a][b] - ref) / (1 - ref);
data[b][a] = (data[b][a] - ref) / (1 - ref);
// logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
minmax.put(data[a][b]);
}
}
for (int a = 0; a < size; a++) {
data[a][a] = 0;
}
LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
boolean hasneg = (minmax.getMin() < -1E-3);
LinearScaling scale;
if (!hasneg) {
scale = LinearScaling.fromMinMax(0., minmax.getMax());
} else {
scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
}
scale = LinearScaling.fromMinMax(0., .5);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < size; x++) {
for (int y = x; y < size; y++) {
double val = data[x][y];
val = Math.max(-1, Math.min(1., scale.getScaled(val)));
// Compute color:
final int col;
{
if (val >= 0) {
int ival = 0xFF & (int) (255 * val);
col = 0xff000000 | (ival << 8);
} else {
int ival = 0xFF & (int) (255 * -val);
col = 0xff000000 | (ival << 16);
}
}
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
}
SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
hier.add(database, smat);
VisualizerContext context = vispar.newContext(hier, smat);
// Attach visualizers to results
SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
factory.processNewResult(context, database);
VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
if (task.getFactory() == factory) {
showVisualization(context, factory, task);
}
});
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class SilhouetteOutlierDetection method run.
@Override
public OutlierResult run(Database database) {
Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
case TREAT_NOISE_AS_SINGLETONS:
// As suggested in Rousseeuw, we use 0 for singletons.
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
scores.put(iter, 0.);
}
mm.put(0.);
continue;
case MERGE_NOISE:
// Treat as cluster below
break;
}
}
ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
// temporary storage.
double[] as = new double[ids.size()];
DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
for (it1.seek(0); it1.valid(); it1.advance()) {
// a: In-cluster distances
// Already computed distances
double a = as[it1.getOffset()];
for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
final double dist = dq.distance(it1, it2);
a += dist;
as[it2.getOffset()] += dist;
}
a /= (ids.size() - 1);
// b: other clusters:
double min = Double.POSITIVE_INFINITY;
for (Cluster<?> ocluster : clusters) {
if (ocluster == /* yes, reference identity */
cluster) {
continue;
}
if (ocluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
continue;
case MERGE_NOISE:
// No special treatment
break;
case TREAT_NOISE_AS_SINGLETONS:
// Treat noise cluster as singletons:
for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
double dist = dq.distance(it1, it3);
if (dist < min) {
min = dist;
}
}
continue;
}
}
final DBIDs oids = ocluster.getIDs();
double b = 0.;
for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
b += dq.distance(it1, it3);
}
b /= oids.size();
if (b < min) {
min = b;
}
}
final double score = (min - a) / Math.max(min, a);
scores.put(it1, score);
mm.put(score);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class OutRankS1 method run.
@Override
public OutlierResult run(Database database) {
DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
// Run the primary algorithm
Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
// Process objects in the cluster
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
OutlierResult res = new OutlierResult(meta, scoreResult);
res.addChildResult(clustering);
return res;
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class AddSingleScale method run.
/**
* Add scales to a single vector relation.
*
* @param rel Relation
* @return Scales
*/
private ScalesResult run(Relation<? extends NumberVector> rel) {
final int dim = RelationUtil.dimensionality(rel);
LinearScale[] scales = new LinearScale[dim];
if (minmax == null) {
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
NumberVector vec = rel.get(iditer);
for (int d = 0; d < dim; d++) {
final double val = vec.doubleValue(d);
if (val != val) {
// NaN
continue;
}
mm.put(val);
}
}
LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
for (int i = 0; i < dim; i++) {
scales[i] = scale;
}
} else {
// Use predefined.
LinearScale scale = new LinearScale(minmax[0], minmax[1]);
for (int i = 0; i < dim; i++) {
scales[i] = scale;
}
}
ScalesResult res = new ScalesResult(scales);
return res;
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class DistanceStatisticsWithClasses method run.
@Override
public HistogramResult run(Database database) {
final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
// determine binning ranges.
DoubleMinMax gminmax = new DoubleMinMax();
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// global in-cluster min/max
DoubleMinMax giminmax = new DoubleMinMax();
// global other-cluster min/max
DoubleMinMax gominmax = new DoubleMinMax();
// in-cluster distances
MeanVariance mimin = new MeanVariance();
MeanVariance mimax = new MeanVariance();
MeanVariance midif = new MeanVariance();
// other-cluster distances
MeanVariance momin = new MeanVariance();
MeanVariance momax = new MeanVariance();
MeanVariance modif = new MeanVariance();
// Histogram
final ObjHistogram<long[]> histogram;
LOG.beginStep(stepprog, 1, "Prepare histogram.");
if (exact) {
gminmax = exactMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else if (sampling) {
gminmax = sampleMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else {
histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {
@Override
protected long[] downsample(Object[] data, int start, int end, int size) {
long[] ret = new long[2];
for (int i = start; i < end; i++) {
long[] existing = (long[]) data[i];
if (existing != null) {
for (int c = 0; c < 2; c++) {
ret[c] += existing[c];
}
}
}
return ret;
}
@Override
protected long[] aggregate(long[] first, long[] second) {
for (int c = 0; c < 2; c++) {
first[c] += second[c];
}
return first;
}
@Override
protected long[] cloneForCache(long[] data) {
return data.clone();
}
@Override
protected long[] makeObject() {
return new long[2];
}
};
}
LOG.beginStep(stepprog, 2, "Build histogram.");
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
// iterate per cluster
final long[] incFirst = new long[] { 1L, 0L };
final long[] incSecond = new long[] { 0L, 1L };
for (Cluster<?> c1 : split) {
for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself.
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incFirst);
iminmax.put(d);
}
// aggregate
mimin.put(iminmax.getMin());
mimax.put(iminmax.getMax());
midif.put(iminmax.getDiff());
// min/max
giminmax.put(iminmax.getMin());
giminmax.put(iminmax.getMax());
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
for (Cluster<?> c2 : split) {
if (c2 == c1) {
continue;
}
for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself (shouldn't happen though)
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incSecond);
ominmax.put(d);
}
}
// aggregate
momin.put(ominmax.getMin());
momax.put(ominmax.getMax());
modif.put(ominmax.getDiff());
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Update values (only needed for sampling case).
gminmax.put(gominmax);
LOG.setCompleted(stepprog);
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
inum += iter.getValue()[0];
onum += iter.getValue()[1];
}
long bnum = inum + onum;
Collection<double[]> binstat = new ArrayList<>(numbin);
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
final long[] value = iter.getValue();
final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
final double ocof = (onum == 0) ? 0 : ((double) value[1]) / onum / histogram.getBinsize();
final double ocaf = ((double) value[1]) / bnum / histogram.getBinsize();
binstat.add(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
}
HistogramResult result = new HistogramResult("Distance Histogram", "distance-histogram", binstat);
result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
return result;
}
Aggregations