use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.
the class VisualizePairwiseGainMatrix method run.
@Override
public void run() {
final Database database = inputstep.getDatabase();
ResultHierarchy hier = database.getHierarchy();
Relation<NumberVector> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
final DBID firstid = DBIDUtil.deref(labels.iterDBIDs());
final String firstlabel = labels.get(firstid);
if (!firstlabel.matches(".*by.?label.*")) {
throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
}
relation = GreedyEnsembleExperiment.applyPrescaling(prescaling, relation, firstid);
// Dimensionality and reference vector
final int dim = RelationUtil.dimensionality(relation);
final NumberVector refvec = relation.get(firstid);
// Build the truth vector
VectorNonZero pos = new VectorNonZero(refvec);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
ids.remove(firstid);
ids.sort();
final int size = ids.size();
double[][] data = new double[size][size];
DoubleMinMax minmax = new DoubleMinMax(), commax = new DoubleMinMax();
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing ensemble gain.", size * (size + 1) >> 1, LOG) : null;
// Vote combination buffer.
double[] buf = new double[2];
int a = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), a++) {
final NumberVector veca = relation.get(id);
// Direct AUC score:
{
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(veca));
data[a][a] = auc;
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
// Compare to others, exploiting symmetry
DBIDArrayIter id2 = ids.iter();
id2.seek(a + 1);
for (int b = a + 1; b < size; b++, id2.advance()) {
final NumberVector vecb = relation.get(id2);
double[] combined = new double[dim];
for (int d = 0; d < dim; d++) {
buf[0] = veca.doubleValue(d);
buf[1] = vecb.doubleValue(d);
combined[d] = voting.combine(buf);
}
double auc = ROCEvaluation.computeROCAUC(pos, new DecreasingVectorIter(DoubleVector.wrap(combined)));
// logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
data[a][b] = auc;
data[b][a] = auc;
commax.put(data[a][b]);
// minmax.put(auc);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
}
for (int a = 0; a < size; a++) {
for (int b = a + 1; b < size; b++) {
double ref = Math.max(data[a][a], data[b][b]);
data[a][b] = (data[a][b] - ref) / (1 - ref);
data[b][a] = (data[b][a] - ref) / (1 - ref);
// logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
// labels.get(ids.get(b)));
minmax.put(data[a][b]);
}
}
for (int a = 0; a < size; a++) {
data[a][a] = 0;
}
LOG.verbose("Gain: " + minmax.toString() + " AUC: " + commax.toString());
boolean hasneg = (minmax.getMin() < -1E-3);
LinearScaling scale;
if (!hasneg) {
scale = LinearScaling.fromMinMax(0., minmax.getMax());
} else {
scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
}
scale = LinearScaling.fromMinMax(0., .5);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < size; x++) {
for (int y = x; y < size; y++) {
double val = data[x][y];
val = Math.max(-1, Math.min(1., scale.getScaled(val)));
// Compute color:
final int col;
{
if (val >= 0) {
int ival = 0xFF & (int) (255 * val);
col = 0xff000000 | (ival << 8);
} else {
int ival = 0xFF & (int) (255 * -val);
col = 0xff000000 | (ival << 16);
}
}
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
}
SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
hier.add(database, smat);
VisualizerContext context = vispar.newContext(hier, smat);
// Attach visualizers to results
SimilarityMatrixVisualizer factory = new SimilarityMatrixVisualizer();
factory.processNewResult(context, database);
VisualizationTree.findVis(context).filter(VisualizationTask.class).forEach(task -> {
if (task.getFactory() == factory) {
showVisualization(context, factory, task);
}
});
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.
the class JudgeOutlierScores method computeScore.
/**
* Evaluate a single outlier score result.
*
* @param ids Inlier IDs
* @param outlierIds Outlier IDs
* @param or Outlier Result to evaluate
* @return Outlier score result
* @throws IllegalStateException
*/
protected ScoreResult computeScore(DBIDs ids, DBIDs outlierIds, OutlierResult or) throws IllegalStateException {
if (scaling instanceof OutlierScalingFunction) {
OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
oscaling.prepare(or);
}
final ScalingFunction innerScaling;
// If we have useful (finite) min/max, use these for binning.
double min = scaling.getMin();
double max = scaling.getMax();
if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
innerScaling = new IdentityScaling();
// TODO: does the outlier score give us this guarantee?
LOG.warning("JudgeOutlierScores expects values between 0.0 and 1.0, but we don't have such a guarantee by the scaling function: min:" + min + " max:" + max);
} else {
if (min == 0.0 && max == 1.0) {
innerScaling = new IdentityScaling();
} else {
innerScaling = new LinearScaling(1.0 / (max - min), -min);
}
}
double posscore = 0.0;
double negscore = 0.0;
// fill histogram with values of each object
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
posscore += (1.0 - result);
}
for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
negscore += result;
}
posscore /= ids.size();
negscore /= outlierIds.size();
LOG.verbose("Scores: " + posscore + " " + negscore);
ArrayList<double[]> s = new ArrayList<>(1);
s.add(new double[] { (posscore + negscore) * .5, posscore, negscore });
return new ScoreResult(s);
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.
the class ComputeSimilarityMatrixImage method computeSimilarityMatrixImage.
/**
* Compute the actual similarity image.
*
* @param relation Relation
* @param iter DBID iterator
* @return result object
*/
private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
for (; iter.valid(); iter.advance()) {
order.add(iter);
}
if (order.size() != relation.size()) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
DistanceQuery<O> dq = distanceFunction.instantiate(relation);
final int size = order.size();
// When the logging is in the outer loop, it's just 2*size (providing enough
// resolution)
// size * (size + 1);
final int ltotal = 2 * size;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Similarity Matrix Image", ltotal, LOG) : null;
// Note: we assume that we have an efficient distance cache available,
// since we are using 2*O(n*n) distance computations.
DoubleMinMax minmax = new DoubleMinMax();
{
DBIDArrayIter id1 = order.iter();
DBIDArrayIter id2 = order.iter();
for (; id1.valid(); id1.advance()) {
id2.seek(id1.getOffset());
for (; id2.valid(); id2.advance()) {
final double dist = dq.distance(id1, id2);
if (!Double.isNaN(dist) && !Double.isInfinite(dist)) /* && dist > 0.0 */
{
if (!skipzero || dist > 0.0) {
minmax.put(dist);
}
}
}
LOG.incrementProcessed(prog);
}
}
double zoom = minmax.getMax() - minmax.getMin();
if (zoom > 0.0) {
zoom = 1. / zoom;
}
LinearScaling scale = new LinearScaling(zoom, -minmax.getMin() * zoom);
BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
{
DBIDArrayIter id1 = order.iter();
DBIDArrayIter id2 = order.iter();
for (int x = 0; x < size && id1.valid(); x++, id1.advance()) {
id2.seek(id1.getOffset());
for (int y = x; y < size && id2.valid(); y++, id2.advance()) {
double ddist = dq.distance(id1, id2);
if (ddist > 0.0) {
ddist = scale.getScaled(ddist);
}
// Apply extra scaling
if (scaling != null) {
ddist = scaling.getScaled(ddist);
}
int dist = 0xFF & (int) (255 * ddist);
int col = 0xff000000 | (dist << 16) | (dist << 8) | dist;
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
return new SimilarityMatrix(img, relation, order);
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling in project elki by elki-project.
the class AutomaticEvaluation method autoEvaluateOutliers.
protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
if (LOG.isDebugging()) {
LOG.debug("Number of new outlier results: " + outliers.size());
}
if (!outliers.isEmpty()) {
Database db = ResultUtil.findDatabase(hier);
ensureClusteringResult(db, db);
Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
if (clusterings.isEmpty()) {
LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
return;
}
Clustering<?> basec = clusterings.iterator().next();
// Find minority class label
int min = Integer.MAX_VALUE;
int total = 0;
String label = null;
if (basec.getAllClusters().size() > 1) {
for (Cluster<?> c : basec.getAllClusters()) {
final int csize = c.getIDs().size();
total += csize;
if (csize < min) {
min = csize;
label = c.getName();
}
}
}
if (label == null) {
LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
return;
}
if (min == 1) {
LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
}
if (min > 0.05 * total) {
LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
}
LOG.verbose("Evaluating using minority class: " + label);
Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
// Evaluate rankings.
new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
// Compute ROC curve
new OutlierROCCurve(pat).processNewResult(hier, newResult);
// Compute Precision at k
new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
// Compute ROC curve
new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
// Compute outlier histogram
new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(hier, newResult);
}
}
Aggregations