Search in sources :

Example 61 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class AggarwalYuEvolutionary method run.

/**
 * Performs the evolutionary algorithm on the given database.
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Database database, Relation<V> relation) {
    final int dbsize = relation.size();
    ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
    Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getSingleThreadedRandom())).run();
    WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    for (; individuums.valid(); individuums.advance()) {
        DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
        double sparsityC = sparsity(ids.size(), dbsize, k, phi);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double prev = outlierScore.doubleValue(iter);
            if (Double.isNaN(prev) || sparsityC < prev) {
                outlierScore.putDouble(iter, sparsityC);
            }
        }
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = outlierScore.doubleValue(iditer);
        if (Double.isNaN(val)) {
            outlierScore.putDouble(iditer, 0.0);
            val = 0.0;
        }
        minmax.put(val);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 62 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class ComputeOutlierHistogram method evaluateOutlierResult.

/**
 * Evaluate a single outlier result as histogram.
 *
 * @param database Database to process
 * @param or Outlier result
 * @return Result
 */
public HistogramResult evaluateOutlierResult(Database database, OutlierResult or) {
    if (scaling instanceof OutlierScalingFunction) {
        OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
        oscaling.prepare(or);
    }
    ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
    DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
    // first value for outliers, second for each object
    // If we have useful (finite) min/max, use these for binning.
    double min = scaling.getMin();
    double max = scaling.getMax();
    final ObjHistogram<DoubleDoublePair> hist;
    if (Double.isInfinite(min) || Double.isNaN(min) || Double.isInfinite(max) || Double.isNaN(max)) {
        hist = new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {

            @Override
            public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
                first.first += second.first;
                first.second += second.second;
                return first;
            }

            @Override
            protected DoubleDoublePair makeObject() {
                return new DoubleDoublePair(0., 0.);
            }

            @Override
            protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
                return new DoubleDoublePair(data.first, data.second);
            }

            @Override
            protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
                DoubleDoublePair sum = new DoubleDoublePair(0, 0);
                for (int i = start; i < end; i++) {
                    DoubleDoublePair p = (DoubleDoublePair) data[i];
                    if (p != null) {
                        sum.first += p.first;
                        sum.second += p.second;
                    }
                }
                return sum;
            }
        };
    } else {
        hist = new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {

            @Override
            protected DoubleDoublePair makeObject() {
                return new DoubleDoublePair(0., 0.);
            }

            @Override
            public void putData(double coord, DoubleDoublePair data) {
                DoubleDoublePair exist = get(coord);
                exist.first += data.first;
                exist.second += data.second;
            }
        };
    }
    // first fill histogram only with values of outliers
    DoubleDoublePair negative, positive;
    if (!splitfreq) {
        negative = new DoubleDoublePair(1. / ids.size(), 0);
        positive = new DoubleDoublePair(0, 1. / ids.size());
    } else {
        negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
        positive = new DoubleDoublePair(0, 1. / outlierIds.size());
    }
    ids.removeDBIDs(outlierIds);
    // fill histogram with values of each object
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = scaling.getScaled(result);
        if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
            hist.putData(result, negative);
        }
    }
    for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
        double result = or.getScores().doubleValue(iter);
        result = scaling.getScaled(result);
        if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
            hist.putData(result, positive);
        }
    }
    Collection<double[]> collHist = new ArrayList<>(hist.getNumBins());
    for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
        DoubleDoublePair data = iter.getValue();
        collHist.add(new double[] { iter.getCenter(), data.first, data.second });
    }
    return new HistogramResult("Outlier Score Histogram", "outlier-histogram", collHist);
}
Also used : ObjHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.ObjHistogram) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleDoublePair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 63 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class AbstractAggarwalYuOutlier method computeSubspace.

/**
 * Method to get the ids in the given subspace.
 *
 * @param subspace Subspace to process
 * @param ranges List of DBID ranges
 * @return ids
 */
protected DBIDs computeSubspace(ArrayList<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
    HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second));
    // intersect all selected dimensions
    for (int i = 1; i < subspace.size(); i++) {
        DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second - GENE_OFFSET);
        ids.retainAll(current);
        if (ids.size() == 0) {
            break;
        }
    }
    return ids;
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)

Example 64 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class SOD method run.

/**
 * Performs the SOD algorithm on the given database.
 *
 * @param relation Data relation to process
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    SimilarityQuery<V> snnInstance = similarityFunction.instantiate(relation);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
    final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDataStore<SODModel> sod_models = null;
    if (models) {
        // Models requested
        sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
        LOG.incrementProcessed(progress);
        DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter);
        double[] center;
        long[] weightVector;
        double sod;
        if (neighborhood.size() > 0) {
            center = Centroid.make(relation, neighborhood).getArrayRef();
            // Note: per-dimension variances; no covariances.
            double[] variances = computePerDimensionVariances(relation, center, neighborhood);
            double expectationOfVariance = Mean.of(variances);
            weightVector = BitsUtil.zero(variances.length);
            for (int d = 0; d < variances.length; d++) {
                if (variances[d] < alpha * expectationOfVariance) {
                    BitsUtil.setI(weightVector, d);
                }
            }
            sod = subspaceOutlierDegree(relation.get(iter), center, weightVector);
        } else {
            center = relation.get(iter).toArray();
            weightVector = null;
            sod = 0.;
        }
        if (sod_models != null) {
            sod_models.put(iter, new SODModel(center, weightVector));
        }
        sod_scores.putDouble(iter, sod);
        minmax.put(sod);
    }
    LOG.ensureCompleted(progress);
    // combine results.
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs()));
    if (sod_models != null) {
        Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs());
        sodResult.addChildResult(models);
    }
    return sodResult;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 65 with DBIDs

use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.

the class MixtureModelOutlierScalingFunction method prepare.

@Override
public void prepare(OutlierResult or) {
    // Initial parameters - are these defaults sounds?
    MeanVariance mv = new MeanVariance();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double val = scores.doubleValue(id);
        if (!Double.isNaN(val) && !Double.isInfinite(val)) {
            mv.put(val);
        }
    }
    double curMu = mv.getMean() * 2.;
    if (curMu == 0) {
        curMu = Double.MIN_NORMAL;
    }
    double curSigma = Math.max(mv.getSampleStddev(), Double.MIN_NORMAL);
    double curLambda = Math.min(1.0 / curMu, Double.MAX_VALUE);
    double curAlpha = 0.05;
    DBIDs ids = scores.getDBIDs();
    // TODO: stop condition!
    int iter = 0;
    // " lambda = " + curLambda + " alpha = " + curAlpha);
    while (true) {
        // E and M-Steps
        // Sum of weights for both distributions
        double otisum = 0.0, itisum = 0.0;
        // Weighted sum for both distributions
        double owsum = 0.0, iwsum = 0.0;
        // Weighted deviation from previous mean (Gaussian only)
        double osqsum = 0.0;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            double val = scores.doubleValue(it);
            // E-Step: estimate outlier probability
            double ti = calcPosterior(val, curAlpha, curMu, curSigma, curLambda);
            // M-Step
            otisum += ti;
            itisum += 1 - ti;
            owsum += ti * val;
            iwsum += (1 - ti) * val;
            // (val - curMu) * (val - curMu);
            osqsum += ti * val * val;
        }
        if (otisum <= 0.0 || owsum <= 0.0) {
            LOG.warning("MixtureModel Outlier Scaling converged to extreme.");
            break;
        }
        double newMu = owsum / otisum;
        double newSigma = Math.max(FastMath.sqrt(osqsum / otisum - newMu * newMu), Double.MIN_NORMAL);
        double newLambda = Math.min(itisum / iwsum, Double.MAX_VALUE);
        double newAlpha = otisum / ids.size();
        // converged?
        if (// 
        Math.abs(newMu - curMu) < DELTA && // 
        Math.abs(newSigma - curSigma) < DELTA && // 
        Math.abs(newLambda - curLambda) < DELTA && Math.abs(newAlpha - curAlpha) < DELTA) {
            break;
        }
        if (newSigma <= 0.0 || newAlpha <= 0.0) {
            LOG.warning("MixtureModel Outlier Scaling converged to extreme.");
            break;
        }
        // LOG.debugFine("iter #"+iter+" mu = " + newMu + " sigma = " +
        // newSigma + " lambda = " + newLambda + " alpha = " + newAlpha);
        curMu = newMu;
        curSigma = newSigma;
        curLambda = newLambda;
        curAlpha = newAlpha;
        iter++;
        if (iter > 100) {
            LOG.warning("Max iterations met in mixture model fitting.");
            break;
        }
    }
    mu = curMu;
    sigma = curSigma;
    lambda = curLambda;
    alpha = curAlpha;
// LOG.debugFine("mu = " + mu + " sigma = " + sigma + " lambda = " +
// lambda + " alpha = " + alpha);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)139 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)77 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)45 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)44 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)40 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)39 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)38 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)38 ArrayList (java.util.ArrayList)35 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)29 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)25 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)23 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)22 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)19 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)18 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)15 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)14