Search in sources :

Example 1 with MeanVarianceMinMax

use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.

the class AveragePrecisionAtK method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final int qk = k + (includeSelf ? 0 : 1);
    final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
    MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
    // sort neighbors
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNList knn = knnQuery.getKNNForDBID(iter, qk);
        Object label = lrelation.get(iter);
        int positive = 0, i = 0;
        for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
            if (!includeSelf && DBIDUtil.equal(iter, ri)) {
                // Do not increment i.
                continue;
            }
            positive += match(label, lrelation.get(ri)) ? 1 : 0;
            final double precision = positive / (double) (i + 1);
            mvs[i].put(precision);
            i++;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    // Transform Histogram into a Double Vector array.
    Collection<double[]> res = new ArrayList<>(k);
    for (int i = 0; i < k; i++) {
        final MeanVarianceMinMax mv = mvs[i];
        final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
        res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
    }
    return new CollectionResult<>("Average Precision", "average-precision", res);
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayList(java.util.ArrayList) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) CollectionResult(de.lmu.ifi.dbs.elki.result.CollectionResult) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList)

Example 2 with MeanVarianceMinMax

use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.

the class InMemoryIDistanceIndex method logStatistics.

@Override
public void logStatistics() {
    super.logStatistics();
    MeanVarianceMinMax mm = new MeanVarianceMinMax();
    for (int i = 0; i < index.length; i++) {
        mm.put(index[i].size());
    }
    LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.min", (int) mm.getMin()));
    LOG.statistics(new DoubleStatistic(InMemoryIDistanceIndex.class.getName() + ".size.mean", mm.getMean()));
    LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.max", (int) mm.getMax()));
}
Also used : DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)

Example 3 with MeanVarianceMinMax

use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.

the class SqrtStandardDeviationScaling method prepare.

@Override
public void prepare(OutlierResult or) {
    if (pmean == null) {
        MeanVarianceMinMax mv = new MeanVarianceMinMax();
        DoubleRelation scores = or.getScores();
        for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
            double val = scores.doubleValue(id);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            mv.put(val);
        }
        min = (pmin == null) ? mv.getMin() : pmin;
        mean = mv.getMean();
        factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2;
    } else {
        mean = pmean;
        double sqsum = 0;
        int cnt = 0;
        DoubleRelation scores = or.getScores();
        double mm = Double.POSITIVE_INFINITY;
        for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
            double val = scores.doubleValue(id);
            mm = Math.min(mm, val);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            sqsum += (val - mean) * (val - mean);
            cnt += 1;
        }
        min = (pmin == null) ? mm : pmin;
        factor = plambda * FastMath.sqrt(sqsum / cnt) * MathUtil.SQRT2;
    }
}
Also used : DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with MeanVarianceMinMax

use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.

the class SqrtStandardDeviationScaling method prepare.

@Override
public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
    if (pmean == null) {
        MeanVarianceMinMax mv = new MeanVarianceMinMax();
        final int size = adapter.size(array);
        for (int i = 0; i < size; i++) {
            double val = adapter.getDouble(array, i);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            mv.put(val);
        }
        min = (pmin == null) ? mv.getMin() : pmin;
        mean = mv.getMean();
        factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2;
    } else {
        mean = pmean;
        Mean sqsum = new Mean();
        double mm = Double.POSITIVE_INFINITY;
        final int size = adapter.size(array);
        for (int i = 0; i < size; i++) {
            double val = adapter.getDouble(array, i);
            mm = Math.min(mm, val);
            val = (val <= min) ? 0 : FastMath.sqrt(val - min);
            sqsum.put((val - mean) * (val - mean));
        }
        min = (pmin == null) ? mm : pmin;
        factor = plambda * FastMath.sqrt(sqsum.getMean()) * MathUtil.SQRT2;
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)

Example 5 with MeanVarianceMinMax

use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.

the class InstanceLogRankNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    InstanceLogRankNormalization<DoubleVector> filter = new ELKIBuilder<>(InstanceLogRankNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Compute the expected mean and variances..
    MeanVariance expected = new MeanVariance();
    for (int ii = 0; ii < dim; ii++) {
        expected.put(Math.log1p(ii / (double) (dim - 1)) * MathUtil.ONE_BY_LOG2);
    }
    // The smallest value (except for ties) must be mapped to 0, the largest to
    // 1. And (again, except for ties), the mean and variance must match above
    // expected values of a uniform distribution.
    MeanVarianceMinMax mms = new MeanVarianceMinMax();
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            mms.put(d.doubleValue(col));
        }
        assertEquals("Min value is not 0", 0., mms.getMin(), 0);
        assertEquals("Max value is not 1", 1., mms.getMax(), 0);
        assertEquals("Mean value is not as expected", expected.getMean(), mms.getMean(), 1e-14);
        assertEquals("Variance is not as expected", expected.getNaiveVariance(), mms.getNaiveVariance(), 1e-14);
        mms.reset();
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

MeanVarianceMinMax (de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax)5 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)1 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)1 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)1 Mean (de.lmu.ifi.dbs.elki.math.Mean)1 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)1 CollectionResult (de.lmu.ifi.dbs.elki.result.CollectionResult)1 ArrayList (java.util.ArrayList)1 Test (org.junit.Test)1