Search in sources :

Example 1 with DistributionEstimator

use of de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator in project elki by elki-project.

the class AttributeWiseCDFNormalization method findBestFit.

/**
 * Find the best fitting distribution.
 *
 * @param col Column of table
 * @param adapter Adapter for accessing the data
 * @param d Dimension
 * @param test Scatch space for testing goodness of fit
 * @return Best fit distribution
 */
protected Distribution findBestFit(final List<V> col, Adapter adapter, int d, double[] test) {
    if (estimators.size() == 1) {
        return estimators.get(0).estimate(col, adapter);
    }
    Distribution best = null;
    double bestq = Double.POSITIVE_INFINITY;
    trials: for (DistributionEstimator<?> est : estimators) {
        try {
            Distribution dist = est.estimate(col, adapter);
            for (int i = 0; i < test.length; i++) {
                test[i] = dist.cdf(col.get(i).doubleValue(d));
                if (Double.isNaN(test[i])) {
                    LOG.warning("Got NaN after fitting " + est.toString() + ": " + dist.toString());
                    continue trials;
                }
                if (Double.isInfinite(test[i])) {
                    LOG.warning("Got infinite value after fitting " + est.toString() + ": " + dist.toString());
                    continue trials;
                }
            }
            Arrays.sort(test);
            double q = KolmogorovSmirnovTest.simpleTest(test);
            if (LOG.isVeryVerbose()) {
                LOG.veryverbose("Estimator " + est.toString() + " (" + dist.toString() + ") has maximum deviation " + q + " for dimension " + d);
            }
            if (best == null || q < bestq) {
                best = dist;
                bestq = q;
            }
        } catch (ArithmeticException e) {
            if (LOG.isVeryVerbose()) {
                LOG.veryverbose("Fitting distribution " + est + " failed: " + e.getMessage());
            }
            continue trials;
        }
    }
    if (LOG.isVerbose()) {
        LOG.verbose("Best fit for dimension " + d + ": " + best.toString());
    }
    return best;
}
Also used : Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) UniformDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution) DistributionEstimator(de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator)

Aggregations

Distribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution)1 UniformDistribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution)1 DistributionEstimator (de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator)1