Search in sources :

Example 21 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class RankingQualityHistogram method run.

/**
 * Process a database
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Histogram of ranking qualities
 */
public HistogramResult run(Database database, Relation<O> relation) {
    final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, relation.size());
    if (LOG.isVerbose()) {
        LOG.verbose("Preprocessing clusters...");
    }
    // Cluster by labels
    Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
    DoubleStaticHistogram hist = new DoubleStaticHistogram(numbins, 0.0, 1.0);
    if (LOG.isVerbose()) {
        LOG.verbose("Processing points...");
    }
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
    ROCEvaluation roc = new ROCEvaluation();
    MeanVariance mv = new MeanVariance();
    // sort neighbors
    for (Cluster<?> clus : split) {
        for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
            KNNList knn = knnQuery.getKNNForDBID(iter, relation.size());
            double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
            mv.put(result);
            hist.increment(result, 1. / relation.size());
            LOG.incrementProcessed(progress);
        }
    }
    LOG.ensureCompleted(progress);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(relation.size());
    for (DoubleStaticHistogram.Iter iter = hist.iter(); iter.valid(); iter.advance()) {
        res.add(new double[] { iter.getCenter(), iter.getValue() });
    }
    HistogramResult result = new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
    result.addHeader("Mean: " + mv.getMean() + " Variance: " + mv.getSampleVariance());
    return result;
}
Also used : ROCEvaluation(de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation) HistogramResult(de.lmu.ifi.dbs.elki.result.HistogramResult) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DoubleStaticHistogram(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistogram)

Example 22 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class LogNormalLevenbergMarquardtKDEEstimator method estimate.

@Override
public <A> LogNormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) {
    // We first need the basic parameters:
    final int len = adapter.size(data);
    MeanVariance mv = new MeanVariance();
    // X positions of samples
    double[] x = new double[len];
    for (int i = 0; i < len; i++) {
        final double val = adapter.getDouble(data, i);
        if (!(val > 0)) {
            throw new ArithmeticException("Cannot fit logNormal to a data set which includes non-positive values: " + val);
        }
        x[i] = FastMath.log(val);
        mv.put(x[i]);
    }
    // Sort our copy.
    Arrays.sort(x);
    double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5;
    // Height = density, via KDE.
    KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6);
    double[] y = de.getDensity();
    // Weights:
    double[] s = new double[len];
    Arrays.fill(s, 1.0);
    // Initial parameter estimate:
    double[] params = { median, mv.getSampleStddev(), 1 };
    boolean[] dofit = { true, true, false };
    LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s);
    fit.run();
    double[] ps = fit.getParams();
    return new LogNormalDistribution(ps[0], ps[1], 0.);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) LevenbergMarquardtMethod(de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod) KernelDensityEstimator(de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator) LogNormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution)

Example 23 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class WelchTTest method deviation.

@Override
public double deviation(double[] sample1, double[] sample2) {
    MeanVariance mv1 = new MeanVariance(), mv2 = new MeanVariance();
    for (double d : sample1) {
        mv1.put(d);
    }
    for (double d : sample2) {
        mv2.put(d);
    }
    final double t = calculateTestStatistic(mv1, mv2);
    final int v = calculateDOF(mv1, mv2);
    return 1 - calculatePValue(t, v);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance)

Example 24 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class OUTRES method outresScore.

/**
 * Main loop of OUTRES. Run for each object
 *
 * @param s start dimension
 * @param subspace Current subspace
 * @param id Current object ID
 * @param kernel Kernel
 * @return Score
 */
public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
    // Initial score is 1.0
    double score = 1.0;
    final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
    MeanVariance meanv = new MeanVariance();
    for (int i = s; i < kernel.dim; i++) {
        if (BitsUtil.get(subspace, i)) {
            // with i=0?
            continue;
        }
        BitsUtil.setI(subspace, i);
        df.setSelectedDimensions(subspace);
        final double adjustedEps = kernel.adjustedEps(kernel.dim);
        // Query with a larger window, to also get neighbors of neighbors
        // Subspace euclidean is metric!
        final double range = adjustedEps * 2.;
        RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
        DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
        DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
        if (neigh.size() > 2) {
            // Relevance test
            if (relevantSubspace(subspace, neigh, kernel)) {
                final double density = kernel.subspaceDensity(subspace, neigh);
                // Compute mean and standard deviation for densities of neighbors.
                meanv.reset();
                for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
                    DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
                    meanv.put(kernel.subspaceDensity(subspace, n2));
                }
                final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
                // High deviation:
                if (deviation >= 1) {
                    score *= (density / deviation);
                }
                // Recursion
                score *= outresScore(i + 1, subspace, id, kernel);
            }
        }
        BitsUtil.clearI(subspace, i);
    }
    return score;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Example 25 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class SigmoidOutlierScalingFunction method prepare.

@Override
public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
    // Initial parameters - are these defaults sounds?
    MeanVariance mv = new MeanVariance();
    final int size = adapter.size(array);
    for (int i = 0; i < size; i++) {
        double val = adapter.getDouble(array, i);
        if (!Double.isInfinite(val)) {
            mv.put(val);
        }
    }
    double a = 1.0;
    double b = -mv.getMean();
    int iter = 0;
    long[] t = BitsUtil.zero(size);
    boolean changing = true;
    while (changing) {
        changing = false;
        // E-Step
        for (int i = 0; i < size; i++) {
            double val = adapter.getDouble(array, i);
            double targ = a * val + b;
            if (targ > 0) {
                if (!BitsUtil.get(t, i)) {
                    BitsUtil.setI(t, i);
                    changing = true;
                }
            } else {
                if (BitsUtil.get(t, i)) {
                    BitsUtil.clearI(t, i);
                    changing = true;
                }
            }
        }
        if (!changing) {
            break;
        }
        // logger.debugFine("Number of outliers in sigmoid: " + t.cardinality());
        // M-Step
        // Implementation based on:<br />
        // H.-T. Lin, C.-J. Lin, R. C. Weng:<br />
        // A Note on Platt’s Probabilistic Outputs for Support Vector Machines
        {
            double[] newab = MStepLevenbergMarquardt(a, b, t, array, adapter);
            a = newab[0];
            b = newab[1];
        }
        iter++;
        if (iter > 100) {
            LOG.warning("Max iterations met in sigmoid fitting.");
            break;
        }
    }
    Afinal = a;
    Bfinal = b;
    LOG.debugFine("A = " + Afinal + " B = " + Bfinal);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance)

Aggregations

MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)61 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)32 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)17 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)9 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)9 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)9 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)9 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)8 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)7 Mean (de.lmu.ifi.dbs.elki.math.Mean)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)5 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)5