Search in sources :

Example 1 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class AttributeWiseVarianceNormalizationTest method testNaNParameters.

/**
 * Test with default parameters and for correcting handling of NaN and Inf.
 */
@Test
public void testNaNParameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    AttributeWiseVarianceNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseVarianceNormalization<DoubleVector>>(AttributeWiseVarianceNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    // This cast is now safe (vector field):
    int dim = ((FieldTypeInformation) bundle.meta(0)).getDimensionality();
    // Verify that the resulting data has mean 0 and variance 1 in each column:
    MeanVariance[] mvs = MeanVariance.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
                mvs[col].put(v);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-15);
        assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-15);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) FieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 2 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class LMCLUS method findAndEvaluateThreshold.

/**
 * Evaluate the histogram to find a suitable threshold
 *
 * @param histogram Histogram to evaluate
 * @return Position and goodness
 */
private double[] findAndEvaluateThreshold(DoubleDynamicHistogram histogram) {
    int n = histogram.getNumBins();
    double[] p1 = new double[n];
    double[] p2 = new double[n];
    double[] mu1 = new double[n];
    double[] mu2 = new double[n];
    double[] sigma1 = new double[n];
    double[] sigma2 = new double[n];
    double[] jt = new double[n];
    // Forward pass
    {
        MeanVariance mv = new MeanVariance();
        DoubleHistogram.Iter forward = histogram.iter();
        for (int i = 0; forward.valid(); i++, forward.advance()) {
            p1[i] = forward.getValue() + ((i > 0) ? p1[i - 1] : 0);
            mv.put(i, forward.getValue());
            mu1[i] = mv.getMean();
            sigma1[i] = mv.getNaiveStddev();
        }
    }
    // Backwards pass
    {
        MeanVariance mv = new MeanVariance();
        DoubleHistogram.Iter backwards = histogram.iter();
        // Seek to last
        backwards.seek(histogram.getNumBins() - 1);
        for (int j = n - 1; backwards.valid(); j--, backwards.retract()) {
            p2[j] = backwards.getValue() + ((j + 1 < n) ? p2[j + 1] : 0);
            mv.put(j, backwards.getValue());
            mu2[j] = mv.getMean();
            sigma2[j] = mv.getNaiveStddev();
        }
    }
    for (int i = 0; i < n; i++) {
        jt[i] = 1.0 + 2 * (p1[i] * (FastMath.log(sigma1[i]) - FastMath.log(p1[i])) + p2[i] * (FastMath.log(sigma2[i]) - FastMath.log(p2[i])));
    }
    int bestpos = -1;
    double bestgoodness = Double.NEGATIVE_INFINITY;
    double devPrev = jt[1] - jt[0];
    for (int i = 1; i < jt.length - 1; i++) {
        double devCur = jt[i + 1] - jt[i];
        // Local minimum found - calculate depth
        if (devCur >= 0 && devPrev <= 0) {
            double lowestMaxima = Double.POSITIVE_INFINITY;
            for (int j = i - 1; j > 0; j--) {
                if (jt[j - 1] < jt[j]) {
                    lowestMaxima = Math.min(lowestMaxima, jt[j]);
                    break;
                }
            }
            for (int j = i + 1; j < n - 2; j++) {
                if (jt[j + 1] < jt[j]) {
                    lowestMaxima = Math.min(lowestMaxima, jt[j]);
                    break;
                }
            }
            double localDepth = lowestMaxima - jt[i];
            final double mud = mu1[i] - mu2[i];
            double discriminability = mud * mud / (sigma1[i] * sigma1[i] + sigma2[i] * sigma2[i]);
            if (Double.isNaN(discriminability)) {
                discriminability = -1;
            }
            double goodness = localDepth * discriminability;
            if (goodness > bestgoodness) {
                bestgoodness = goodness;
                bestpos = i;
            }
        }
        devPrev = devCur;
    }
    Iter iter = histogram.iter();
    iter.seek(bestpos);
    return new double[] { iter.getRight(), bestgoodness };
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) Iter(de.lmu.ifi.dbs.elki.utilities.datastructures.histogram.DoubleStaticHistogram.Iter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class ExpGammaExpMOMEstimator method estimate.

@Override
public <A> ExpGammaDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) {
    final int len = adapter.size(data);
    MeanVariance mv = new MeanVariance();
    for (int i = 0; i < len; i++) {
        mv.put(FastMath.exp(adapter.getDouble(data, i)));
    }
    return estimateFromExpMeanVariance(mv);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance)

Example 4 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class LogMeanVarianceEstimator method estimate.

@Override
default <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) {
    final int len = adapter.size(data);
    double min = LogMOMDistributionEstimator.min(data, adapter, 0., 1e-10);
    MeanVariance mv = new MeanVariance();
    for (int i = 0; i < len; i++) {
        final double val = adapter.getDouble(data, i) - min;
        if (Double.isInfinite(val) || Double.isNaN(val) || val <= 0.) {
            continue;
        }
        mv.put(FastMath.log(val));
    }
    if (!(mv.getCount() > 1.)) {
        throw new ArithmeticException("Too small sample size to estimate variance.");
    }
    return estimateFromLogMeanVariance(mv, min);
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance)

Example 5 with MeanVariance

use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.

the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    MeanVariance ksize = new MeanVariance();
    if (LOG.isVerbose()) {
        LOG.verbose("Approximating nearest neighbor lists to database objects");
    }
    // Produce a random shuffling of the IDs:
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
    for (int part = 0; part < partitions; part++) {
        final ArrayDBIDs ids = parts[part];
        final int size = ids.size();
        Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
        cache.defaultReturnValue(Double.NaN);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            KNNHeap kNN = DBIDUtil.newHeap(k);
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                DBIDPair key = DBIDUtil.newPair(iter, iter2);
                double d = cache.removeDouble(key);
                if (d == d) {
                    // Not NaN
                    // consume the previous result.
                    kNN.insert(d, iter2);
                } else {
                    // compute new and store the previous result.
                    d = distanceQuery.distance(iter, iter2);
                    kNN.insert(d, iter2);
                    // put it into the cache, but with the keys reversed
                    key = DBIDUtil.newPair(iter2, iter);
                    cache.put(key, d);
                }
            }
            ksize.put(kNN.size());
            storage.put(iter, kNN.toKNNList());
        }
        if (LOG.isDebugging() && cache.size() > 0) {
            LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
}
Also used : Object2DoubleOpenHashMap(it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDPair(de.lmu.ifi.dbs.elki.database.ids.DBIDPair) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Aggregations

MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)61 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)32 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)17 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)9 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)9 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)9 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)9 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)8 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)7 Mean (de.lmu.ifi.dbs.elki.math.Mean)7 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)6 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)5 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)5