Search in sources :

Example 6 with NormalDistribution

use of de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution in project elki by elki-project.

the class GeneratorXMLDatabaseConnection method processElementNormal.

/**
 * Process a 'normal' Element in the XML stream.
 *
 * @param cluster
 * @param cur Current document nod
 */
private void processElementNormal(GeneratorSingleCluster cluster, Node cur) {
    double mean = 0.0;
    double stddev = 1.0;
    String meanstr = ((Element) cur).getAttribute(ATTR_MEAN);
    if (meanstr != null && meanstr.length() > 0) {
        mean = ParseUtil.parseDouble(meanstr);
    }
    String stddevstr = ((Element) cur).getAttribute(ATTR_STDDEV);
    if (stddevstr != null && stddevstr.length() > 0) {
        stddev = ParseUtil.parseDouble(stddevstr);
    }
    // *** New normal distribution generator
    Random random = cluster.getNewRandomGenerator();
    Distribution generator = new NormalDistribution(mean, stddev, random);
    cluster.addGenerator(generator);
    // TODO: check for unknown attributes.
    XMLNodeIterator iter = new XMLNodeIterator(cur.getFirstChild());
    while (iter.hasNext()) {
        Node child = iter.next();
        if (child.getNodeType() == Node.ELEMENT_NODE) {
            LOG.warning("Unknown element in XML specification file: " + child.getNodeName());
        }
    }
}
Also used : Random(java.util.Random) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Element(org.w3c.dom.Element) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) UniformDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution) GammaDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution) HaltonUniformDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.HaltonUniformDistribution) Node(org.w3c.dom.Node) XMLNodeIterator(de.lmu.ifi.dbs.elki.utilities.xml.XMLNodeIterator)

Example 7 with NormalDistribution

use of de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution in project elki by elki-project.

the class TrivialGeneratedOutlier method run.

/**
 * Run the algorithm
 *
 * @param models Model relation
 * @param vecs Vector relation
 * @param labels Label relation
 * @return Outlier result
 */
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
    HashSet<GeneratorSingleCluster> generators = new HashSet<>();
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        Model model = models.get(iditer);
        if (model instanceof GeneratorSingleCluster) {
            generators.add((GeneratorSingleCluster) model);
        }
    }
    if (generators.isEmpty()) {
        LOG.warning("No generator models found for dataset - all points will be considered outliers.");
    }
    for (GeneratorSingleCluster gen : generators) {
        for (int i = 0; i < gen.getDim(); i++) {
            Distribution dist = gen.getDistribution(i);
            if (!(dist instanceof NormalDistribution)) {
                throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
            }
        }
    }
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1.;
        double[] v = vecs.get(iditer).toArray();
        for (GeneratorSingleCluster gen : generators) {
            double[] tv = v;
            // Transform backwards
            if (gen.getTransformation() != null) {
                tv = gen.getTransformation().applyInverse(v);
            }
            final int dim = tv.length;
            double lensq = 0.0;
            int norm = 0;
            for (int i = 0; i < dim; i++) {
                Distribution dist = gen.getDistribution(i);
                if (dist instanceof NormalDistribution) {
                    NormalDistribution d = (NormalDistribution) dist;
                    double delta = (tv[i] - d.getMean()) / d.getStddev();
                    lensq += delta * delta;
                    norm += 1;
                } else {
                    throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
                }
            }
            if (norm > 0.) {
                // The squared distances are ChiSquared distributed
                score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
            } else {
                score = 0.;
            }
        }
        if (expect < 1) {
            score = expect * score / (1 - score + expect);
        }
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) ChiSquaredDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution) Model(de.lmu.ifi.dbs.elki.data.model.Model) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) HashSet(java.util.HashSet) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 8 with NormalDistribution

use of de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution in project elki by elki-project.

the class NormalLevenbergMarquardtKDEEstimator method estimate.

@Override
public <A> NormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) {
    // We first need the basic parameters:
    final int len = adapter.size(data);
    MeanVariance mv = new MeanVariance();
    // X positions of samples
    double[] x = new double[len];
    for (int i = 0; i < len; i++) {
        x[i] = adapter.getDouble(data, i);
        mv.put(x[i]);
    }
    // Sort our copy.
    Arrays.sort(x);
    double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5;
    // Height = density, via KDE.
    KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6);
    double[] y = de.getDensity();
    // Weights:
    double[] s = new double[len];
    Arrays.fill(s, 1.0);
    // Initial parameter estimate:
    double[] params = { median, mv.getSampleStddev(), 1 };
    boolean[] dofit = { true, true, false };
    LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s);
    fit.run();
    double[] ps = fit.getParams();
    return new NormalDistribution(ps[0], ps[1]);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) LevenbergMarquardtMethod(de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod) KernelDensityEstimator(de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator)

Example 9 with NormalDistribution

use of de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution in project elki by elki-project.

the class NormalLevenbergMarquardtKDEEstimatorTest method testEstimator.

@Test
public void testEstimator() {
    final NormalLevenbergMarquardtKDEEstimator est = instantiate(NormalLevenbergMarquardtKDEEstimator.class, NormalDistribution.class);
    load("norm.ascii.gz");
    double[] data = this.data.get("random_01_01");
    NormalDistribution dist = est.estimate(data, DoubleArrayAdapter.STATIC);
    assertStat("mean", dist.getMean(), 0.1, -0.01151765753759501);
    assertStat("stddev", dist.getStddev(), 0.1, -0.009054202061335156);
    data = this.data.get("random_0_1");
    dist = est.estimate(data, DoubleArrayAdapter.STATIC);
    assertStat("mean", dist.getMean(), 0., 0.048356873915244764);
    assertStat("stddev", dist.getStddev(), 1., 0.0812973063260789);
    data = this.data.get("random_1_3");
    dist = est.estimate(data, DoubleArrayAdapter.STATIC);
    assertStat("mean", dist.getMean(), 1., -0.5780094891472563);
    assertStat("stddev", dist.getStddev(), 3., 0.7945863404180376);
}
Also used : NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Test(org.junit.Test)

Example 10 with NormalDistribution

use of de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution in project elki by elki-project.

the class NormalMOMEstimatorTest method testEstimator.

@Test
public void testEstimator() {
    final NormalMOMEstimator est = instantiate(NormalMOMEstimator.class, NormalDistribution.class);
    load("norm.ascii.gz");
    double[] data = this.data.get("random_01_01");
    NormalDistribution dist = est.estimate(data, DoubleArrayAdapter.STATIC);
    assertStat("mean", dist.getMean(), 0.1, -0.01038465173940939);
    assertStat("stddev", dist.getStddev(), 0.1, -0.009183157199219943);
    data = this.data.get("random_0_1");
    dist = est.estimate(data, DoubleArrayAdapter.STATIC);
    assertStat("mean", dist.getMean(), 0., -0.022789814366585885);
    assertStat("stddev", dist.getStddev(), 1., -0.022483751299652854);
    data = this.data.get("random_1_3");
    dist = est.estimate(data, DoubleArrayAdapter.STATIC);
    assertStat("mean", dist.getMean(), 1., -0.41895505881424644);
    assertStat("stddev", dist.getStddev(), 3., 0.1904775336995228);
}
Also used : NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Test(org.junit.Test)

Aggregations

NormalDistribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution)11 Test (org.junit.Test)8 Random (java.util.Random)5 Distribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution)3 UniformDistribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution)2 NormalMOMEstimator (de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalMOMEstimator)2 ListParameterization (de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 IntegerVector (de.lmu.ifi.dbs.elki.data.IntegerVector)1 Model (de.lmu.ifi.dbs.elki.data.model.Model)1 GeneratorSingleCluster (de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster)1 FieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation)1 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)1 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)1 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1 ReplaceNaNWithRandomFilter (de.lmu.ifi.dbs.elki.datasource.filter.cleaning.ReplaceNaNWithRandomFilter)1 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)1