use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class HiCS method run.
/**
* Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
* outlier detection algorithm
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
if (LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<DoubleRelation> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// relation instead of SubspaceEuclideanDistanceFunction?)
for (HiCSSubspace dimset : subspaces) {
if (LOG.isVerbose()) {
LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
for (DoubleRelation r : results) {
final double s = r.doubleValue(iditer);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iditer, sum);
minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class HiCS method calculateContrast.
/**
* Calculates the actual contrast of a given subspace.
*
* @param relation Relation to process
* @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = FastMath.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
DBIDArrayIter iter = sortedIndices.iter();
iter.seek(random.nextInt(relation.size() - windowsize));
for (int k = 0; k < windowsize; k++, iter.advance()) {
// select index block
indexBlock.add(iter);
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
if (conditionalSample.size() < 10) {
retries++;
if (LOG.isDebugging()) {
LOG.debug("Sample size very small. Retry no. " + retries);
}
if (retries >= MAX_RETRIES) {
LOG.warning("Too many retries, for small samples: " + retries);
} else {
i--;
continue;
}
}
// Project conditional set
double[] sampleValues = new double[conditionalSample.size()];
{
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
// Project full set
double[] fullValues = new double[relation.size()];
{
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
if (Double.isNaN(contrast)) {
i--;
LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
subspace.contrast = deviationSum / m;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ParallelSimplifiedLOF method run.
public OutlierResult run(Database database, Relation<O> relation) {
DBIDs ids = relation.getDBIDs();
DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
// Phase one: KNN and k-dist
WritableDataStore<KNNList> knns = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, KNNList.class);
{
// Compute kNN
KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
SharedObject<KNNList> knnv = new SharedObject<>();
WriteDataStoreProcessor<KNNList> storek = new WriteDataStoreProcessor<>(knns);
knnm.connectKNNOutput(knnv);
storek.connectInput(knnv);
ParallelExecutor.run(ids, knnm, storek);
}
// Phase two: simplified-lrd
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
{
SimplifiedLRDProcessor lrdm = new SimplifiedLRDProcessor(knns);
SharedDouble lrdv = new SharedDouble();
WriteDoubleDataStoreProcessor storelrd = new WriteDoubleDataStoreProcessor(lrds);
lrdm.connectOutput(lrdv);
storelrd.connectInput(lrdv);
ParallelExecutor.run(ids, lrdm, storelrd);
}
// Phase three: Simplified-LOF
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
DoubleMinMax minmax;
{
LOFProcessor lofm = new LOFProcessor(knns, lrds, true);
SharedDouble lofv = new SharedDouble();
DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
WriteDoubleDataStoreProcessor storelof = new WriteDoubleDataStoreProcessor(lofs);
lofm.connectOutput(lofv);
mmm.connectInput(lofv);
storelof.connectInput(lofv);
ParallelExecutor.run(ids, lofm, storelof, mmm);
minmax = mmm.getMinMax();
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class SimpleKernelDensityLOF method run.
/**
* Run the naive kernel density LOF algorithm.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
int count = 0;
double sum = 0.0;
// Fast version for double distances
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
if (max == 0) {
sum = Double.POSITIVE_INFINITY;
break;
}
final double v = neighbor.doubleValue() / max;
sum += kernel.density(v) / MathUtil.powi(max, dim);
count++;
}
final double density = count > 0 ? sum / count : 0.;
dens.putDouble(it, density);
LOG.incrementProcessed(densProgress);
}
LOG.ensureCompleted(densProgress);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing KLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = dens.doubleValue(it);
final double lof;
if (lrdp > 0) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += dens.doubleValue(neighbor);
count++;
}
lof = (lrdp == Double.POSITIVE_INFINITY) ? (sum == Double.POSITIVE_INFINITY ? 1 : 0.) : sum / (count * lrdp);
} else {
lof = 1.0;
}
lofs.putDouble(it, lof);
// update minimum and maximum
lofminmax.put(lof);
LOG.incrementProcessed(progressLOFs);
}
LOG.ensureCompleted(progressLOFs);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class TrimmedMeanApproach method run.
/**
* Run the algorithm.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
int num = 0;
double[] values = new double[neighbors.size()];
// calculate trimmedMean
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
values[num] = relation.get(iter).doubleValue(0);
num++;
}
// calculate local trimmed Mean and error term
final double tm;
if (num > 0) {
int left = (int) Math.floor(p * (num - 1));
int right = (int) Math.floor((1 - p) * (num - 1));
Arrays.sort(values, 0, num);
Mean mean = new Mean();
for (int i = left; i <= right; i++) {
mean.put(values[i]);
}
tm = mean.getMean();
} else {
tm = relation.get(iditer).doubleValue(0);
}
// Error: deviation from trimmed mean
errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isVerbose()) {
LOG.verbose("Computing median error.");
}
double median_dev_from_median;
{
// calculate the median error
double[] ei = new double[relation.size()];
{
int i = 0;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ei[i] = errors.doubleValue(iditer);
i++;
}
}
double median_i = QuickSelect.median(ei);
// Update to deviation from median
for (int i = 0; i < ei.length; i++) {
ei[i] = Math.abs(ei[i] - median_i);
}
// Again, extract median
median_dev_from_median = QuickSelect.median(ei);
}
if (LOG.isVerbose()) {
LOG.verbose("Normalizing scores.");
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
scores.putDouble(iditer, score);
minmax.put(score);
}
//
DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
Aggregations