use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.
the class BubbleVisualization method makeVisualization.
@Override
public Visualization makeVisualization(VisualizerContext context, VisualizationTask task, VisualizationPlot plot, double width, double height, Projection proj) {
if (settings.scaling != null && settings.scaling instanceof OutlierScalingFunction) {
final OutlierResult outlierResult = task.getResult();
((OutlierScalingFunction) settings.scaling).prepare(outlierResult);
}
return new Instance(context, task, plot, width, height, proj);
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.
the class ExternalDoubleOutlierScore method run.
/**
* Run the algorithm.
*
* @param database Database to use
* @param relation Relation to use
* @return Result
*/
public OutlierResult run(Database database, Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
try (//
InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
Tokenizer tokenizer = reader.getTokenizer();
CharSequence buf = reader.getBuffer();
Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
reader.reset(in);
while (reader.nextLineExceptComments()) {
Integer id = null;
double score = Double.NaN;
for (; /* initialized by nextLineExceptComments */
tokenizer.valid(); tokenizer.advance()) {
mi.region(tokenizer.getStart(), tokenizer.getEnd());
ms.region(tokenizer.getStart(), tokenizer.getEnd());
final boolean mif = mi.find();
final boolean msf = ms.find();
if (mif && msf) {
throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
}
if (mif) {
if (id != null) {
throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
}
id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
}
if (msf) {
if (!Double.isNaN(score)) {
throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
}
score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
}
}
if (id != null && !Double.isNaN(score)) {
scores.putDouble(DBIDUtil.importInteger(id), score);
minmax.put(score);
} else if (id == null && Double.isNaN(score)) {
LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
} else {
throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
}
}
} catch (IOException e) {
throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
}
OutlierScoreMeta meta;
if (inverted) {
meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
} else {
meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
}
DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
OutlierResult or = new OutlierResult(meta, scoresult);
// Apply scaling
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = scoresult.doubleValue(iditer);
val = scaling.getScaled(val);
scores.putDouble(iditer, val);
mm.put(val);
}
meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
or = new OutlierResult(meta, scoresult);
return or;
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.
the class ComputeKNNOutlierScores method writeResult.
/**
* Write a single output line.
*
* @param out Output stream
* @param ids DBIDs
* @param result Outlier result
* @param scaling Scaling function
* @param label Identification label
*/
void writeResult(PrintStream out, DBIDs ids, OutlierResult result, ScalingFunction scaling, String label) {
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(result);
}
out.append(label);
DoubleRelation scores = result.getScores();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double value = scores.doubleValue(iter);
value = scaling != null ? scaling.getScaled(value) : value;
out.append(' ').append(Double.toString(value));
}
out.append(FormatUtil.NEWLINE);
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.
the class OutlierThresholdClustering method split.
private Clustering<Model> split(OutlierResult or) {
DoubleRelation scores = or.getScores();
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
ArrayList<ModifiableDBIDs> idlists = new ArrayList<>(threshold.length + 1);
for (int i = 0; i <= threshold.length; i++) {
idlists.add(DBIDUtil.newHashSet());
}
for (DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
double score = scores.doubleValue(iter);
if (scaling != null) {
score = scaling.getScaled(score);
}
int i = 0;
for (; i < threshold.length; i++) {
if (score < threshold[i]) {
break;
}
}
idlists.get(i).add(iter);
}
Clustering<Model> c = new Clustering<>("Outlier threshold clustering", "threshold-clustering");
for (int i = 0; i <= threshold.length; i++) {
String name = (i == 0) ? "Inlier" : "Outlier_" + threshold[i - 1];
c.addToplevelCluster(new Cluster<>(name, idlists.get(i), (i > 0)));
}
return c;
}
use of de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction in project elki by elki-project.
the class ComputeOutlierHistogram method evaluateOutlierResult.
/**
* Evaluate a single outlier result as histogram.
*
* @param database Database to process
* @param or Outlier result
* @return Result
*/
public HistogramResult evaluateOutlierResult(Database database, OutlierResult or) {
if (scaling instanceof OutlierScalingFunction) {
OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
oscaling.prepare(or);
}
ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
// first value for outliers, second for each object
// If we have useful (finite) min/max, use these for binning.
double min = scaling.getMin();
double max = scaling.getMax();
final ObjHistogram<DoubleDoublePair> hist;
if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {
@Override
public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
first.first += second.first;
first.second += second.second;
return first;
}
@Override
protected DoubleDoublePair makeObject() {
return new DoubleDoublePair(0., 0.);
}
@Override
protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
return new DoubleDoublePair(data.first, data.second);
}
@Override
protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
DoubleDoublePair sum = new DoubleDoublePair(0, 0);
for (int i = start; i < end; i++) {
DoubleDoublePair p = (DoubleDoublePair) data[i];
if (p != null) {
sum.first += p.first;
sum.second += p.second;
}
}
return sum;
}
};
} else {
hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {
@Override
protected DoubleDoublePair makeObject() {
return new DoubleDoublePair(0., 0.);
}
@Override
public void putData(double coord, DoubleDoublePair data) {
DoubleDoublePair exist = get(coord);
exist.first += data.first;
exist.second += data.second;
}
};
}
// first fill histogram only with values of outliers
DoubleDoublePair negative, positive;
if (!splitfreq) {
negative = new DoubleDoublePair(1. / ids.size(), 0);
positive = new DoubleDoublePair(0, 1. / ids.size());
} else {
negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
positive = new DoubleDoublePair(0, 1. / outlierIds.size());
}
ids.removeDBIDs(outlierIds);
// fill histogram with values of each object
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
hist.putData(result, negative);
}
}
for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
hist.putData(result, positive);
}
}
Collection<double[]> collHist = new ArrayList<>(hist.getNumBins());
for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
DoubleDoublePair data = iter.getValue();
collHist.add(new double[] { iter.getCenter(), data.first, data.second });
}
return new HistogramResult("Outlier Score Histogram", "outlier-histogram", collHist);
}
Aggregations