Search in sources :

Example 36 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OutlierThresholdClustering method split.

private Clustering<Model> split(OutlierResult or) {
    DoubleRelation scores = or.getScores();
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    ArrayList<ModifiableDBIDs> idlists = new ArrayList<>(threshold.length + 1);
    for (int i = 0; i <= threshold.length; i++) {
        idlists.add(DBIDUtil.newHashSet());
    }
    for (DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
        double score = scores.doubleValue(iter);
        if (scaling != null) {
            score = scaling.getScaled(score);
        }
        int i = 0;
        for (; i < threshold.length; i++) {
            if (score < threshold[i]) {
                break;
            }
        }
        idlists.get(i).add(iter);
    }
    Clustering<Model> c = new Clustering<>("Outlier threshold clustering", "threshold-clustering");
    for (int i = 0; i <= threshold.length; i++) {
        String name = (i == 0) ? "Inlier" : "Outlier_" + threshold[i - 1];
        c.addToplevelCluster(new Cluster<>(name, idlists.get(i), (i > 0)));
    }
    return c;
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 37 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class AggarwalYuEvolutionary method run.

/**
 * Performs the evolutionary algorithm on the given database.
 *
 * @param database Database
 * @param relation Relation
 * @return Result
 */
public OutlierResult run(Database database, Relation<V> relation) {
    final int dbsize = relation.size();
    ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
    Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getSingleThreadedRandom())).run();
    WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    for (; individuums.valid(); individuums.advance()) {
        DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
        double sparsityC = sparsity(ids.size(), dbsize, k, phi);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double prev = outlierScore.doubleValue(iter);
            if (Double.isNaN(prev) || sparsityC < prev) {
                outlierScore.putDouble(iter, sparsityC);
            }
        }
    }
    DoubleMinMax minmax = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double val = outlierScore.doubleValue(iditer);
        if (Double.isNaN(val)) {
            outlierScore.putDouble(iditer, 0.0);
            val = 0.0;
        }
        minmax.put(val);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
    return new OutlierResult(meta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Heap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap) TopBoundedHeap(de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 38 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class SigmoidOutlierScalingFunction method prepare.

@Override
public void prepare(OutlierResult or) {
    // Initial parameters - are these defaults sounds?
    MeanVariance mv = new MeanVariance();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double val = scores.doubleValue(id);
        mv.put(val);
    }
    double a = 1.0;
    double b = -mv.getMean();
    int iter = 0;
    ArrayDBIDs ids = DBIDUtil.ensureArray(or.getScores().getDBIDs());
    DBIDArrayIter it = ids.iter();
    long[] t = BitsUtil.zero(ids.size());
    boolean changing = true;
    while (changing) {
        changing = false;
        // E-Step
        it.seek(0);
        for (int i = 0; i < ids.size(); i++, it.advance()) {
            double val = or.getScores().doubleValue(it);
            double targ = a * val + b;
            if (targ > 0) {
                if (!BitsUtil.get(t, i)) {
                    BitsUtil.setI(t, i);
                    changing = true;
                }
            } else {
                if (BitsUtil.get(t, i)) {
                    BitsUtil.clearI(t, i);
                    changing = true;
                }
            }
        }
        if (!changing) {
            break;
        }
        // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality());
        // M-Step
        // Implementation based on:<br />
        // H.-T. Lin, C.-J. Lin, R. C. Weng:<br />
        // A Note on Platt’s Probabilistic Outputs for Support Vector Machines
        {
            double[] newab = MStepLevenbergMarquardt(a, b, ids, t, or.getScores());
            a = newab[0];
            b = newab[1];
        }
        iter++;
        if (iter > 100) {
            LOG.warning("Max iterations met in sigmoid fitting.");
            break;
        }
    }
    Afinal = a;
    Bfinal = b;
    LOG.debugFine("A = " + Afinal + " B = " + Bfinal);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 39 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class MixtureModelOutlierScalingFunction method prepare.

@Override
public void prepare(OutlierResult or) {
    // Initial parameters - are these defaults sounds?
    MeanVariance mv = new MeanVariance();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double val = scores.doubleValue(id);
        if (!Double.isNaN(val) && !Double.isInfinite(val)) {
            mv.put(val);
        }
    }
    double curMu = mv.getMean() * 2.;
    if (curMu == 0) {
        curMu = Double.MIN_NORMAL;
    }
    double curSigma = Math.max(mv.getSampleStddev(), Double.MIN_NORMAL);
    double curLambda = Math.min(1.0 / curMu, Double.MAX_VALUE);
    double curAlpha = 0.05;
    DBIDs ids = scores.getDBIDs();
    // TODO: stop condition!
    int iter = 0;
    // " lambda = " + curLambda + " alpha = " + curAlpha);
    while (true) {
        // E and M-Steps
        // Sum of weights for both distributions
        double otisum = 0.0, itisum = 0.0;
        // Weighted sum for both distributions
        double owsum = 0.0, iwsum = 0.0;
        // Weighted deviation from previous mean (Gaussian only)
        double osqsum = 0.0;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            double val = scores.doubleValue(it);
            // E-Step: estimate outlier probability
            double ti = calcPosterior(val, curAlpha, curMu, curSigma, curLambda);
            // M-Step
            otisum += ti;
            itisum += 1 - ti;
            owsum += ti * val;
            iwsum += (1 - ti) * val;
            // (val - curMu) * (val - curMu);
            osqsum += ti * val * val;
        }
        if (otisum <= 0.0 || owsum <= 0.0) {
            LOG.warning("MixtureModel Outlier Scaling converged to extreme.");
            break;
        }
        double newMu = owsum / otisum;
        double newSigma = Math.max(FastMath.sqrt(osqsum / otisum - newMu * newMu), Double.MIN_NORMAL);
        double newLambda = Math.min(itisum / iwsum, Double.MAX_VALUE);
        double newAlpha = otisum / ids.size();
        // converged?
        if (// 
        Math.abs(newMu - curMu) < DELTA && // 
        Math.abs(newSigma - curSigma) < DELTA && // 
        Math.abs(newLambda - curLambda) < DELTA && Math.abs(newAlpha - curAlpha) < DELTA) {
            break;
        }
        if (newSigma <= 0.0 || newAlpha <= 0.0) {
            LOG.warning("MixtureModel Outlier Scaling converged to extreme.");
            break;
        }
        // LOG.debugFine("iter #"+iter+" mu = " + newMu + " sigma = " +
        // newSigma + " lambda = " + newLambda + " alpha = " + newAlpha);
        curMu = newMu;
        curSigma = newSigma;
        curLambda = newLambda;
        curAlpha = newAlpha;
        iter++;
        if (iter > 100) {
            LOG.warning("Max iterations met in mixture model fitting.");
            break;
        }
    }
    mu = curMu;
    sigma = curSigma;
    lambda = curLambda;
    alpha = curAlpha;
// LOG.debugFine("mu = " + mu + " sigma = " + sigma + " lambda = " +
// lambda + " alpha = " + alpha);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 40 with DoubleRelation

use of de.lmu.ifi.dbs.elki.database.relation.DoubleRelation in project elki by elki-project.

the class OutlierGammaScaling method prepare.

@Override
public void prepare(OutlierResult or) {
    meta = or.getOutlierMeta();
    MeanVariance mv = new MeanVariance();
    DoubleRelation scores = or.getScores();
    for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
        double score = scores.doubleValue(id);
        score = preScale(score);
        if (!Double.isNaN(score) && !Double.isInfinite(score)) {
            mv.put(score);
        }
    }
    final double mean = mv.getMean();
    final double var = mv.getSampleVariance();
    k = (mean * mean) / var;
    theta = var / mean;
    atmean = GammaDistribution.regularizedGammaP(k, mean / theta);
// logger.warning("Mean:"+mean+" Var:"+var+" Theta: "+theta+" k: "+k+" valatmean"+atmean);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)89 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)72 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)69 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)65 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)38 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)34 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)21 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)18 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)17 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)14 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)13 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)13 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)11 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)11 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)8 Mean (de.lmu.ifi.dbs.elki.math.Mean)8