use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.
the class AveragePrecisionAtK method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final int qk = k + (includeSelf ? 0 : 1);
final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
if (!includeSelf && DBIDUtil.equal(iter, ri)) {
// Do not increment i.
continue;
}
positive += match(label, lrelation.get(ri)) ? 1 : 0;
final double precision = positive / (double) (i + 1);
mvs[i].put(precision);
i++;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
final MeanVarianceMinMax mv = mvs[i];
final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
}
return new CollectionResult<>("Average Precision", "average-precision", res);
}
use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.
the class InMemoryIDistanceIndex method logStatistics.
@Override
public void logStatistics() {
super.logStatistics();
MeanVarianceMinMax mm = new MeanVarianceMinMax();
for (int i = 0; i < index.length; i++) {
mm.put(index[i].size());
}
LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.min", (int) mm.getMin()));
LOG.statistics(new DoubleStatistic(InMemoryIDistanceIndex.class.getName() + ".size.mean", mm.getMean()));
LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.max", (int) mm.getMax()));
}
use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.
the class SqrtStandardDeviationScaling method prepare.
@Override
public void prepare(OutlierResult or) {
if (pmean == null) {
MeanVarianceMinMax mv = new MeanVarianceMinMax();
DoubleRelation scores = or.getScores();
for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
double val = scores.doubleValue(id);
val = (val <= min) ? 0 : FastMath.sqrt(val - min);
mv.put(val);
}
min = (pmin == null) ? mv.getMin() : pmin;
mean = mv.getMean();
factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2;
} else {
mean = pmean;
double sqsum = 0;
int cnt = 0;
DoubleRelation scores = or.getScores();
double mm = Double.POSITIVE_INFINITY;
for (DBIDIter id = scores.iterDBIDs(); id.valid(); id.advance()) {
double val = scores.doubleValue(id);
mm = Math.min(mm, val);
val = (val <= min) ? 0 : FastMath.sqrt(val - min);
sqsum += (val - mean) * (val - mean);
cnt += 1;
}
min = (pmin == null) ? mm : pmin;
factor = plambda * FastMath.sqrt(sqsum / cnt) * MathUtil.SQRT2;
}
}
use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.
the class SqrtStandardDeviationScaling method prepare.
@Override
public <A> void prepare(A array, NumberArrayAdapter<?, A> adapter) {
if (pmean == null) {
MeanVarianceMinMax mv = new MeanVarianceMinMax();
final int size = adapter.size(array);
for (int i = 0; i < size; i++) {
double val = adapter.getDouble(array, i);
val = (val <= min) ? 0 : FastMath.sqrt(val - min);
mv.put(val);
}
min = (pmin == null) ? mv.getMin() : pmin;
mean = mv.getMean();
factor = plambda * mv.getSampleStddev() * MathUtil.SQRT2;
} else {
mean = pmean;
Mean sqsum = new Mean();
double mm = Double.POSITIVE_INFINITY;
final int size = adapter.size(array);
for (int i = 0; i < size; i++) {
double val = adapter.getDouble(array, i);
mm = Math.min(mm, val);
val = (val <= min) ? 0 : FastMath.sqrt(val - min);
sqsum.put((val - mean) * (val - mean));
}
min = (pmin == null) ? mm : pmin;
factor = plambda * FastMath.sqrt(sqsum.getMean()) * MathUtil.SQRT2;
}
}
use of de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax in project elki by elki-project.
the class InstanceLogRankNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
InstanceLogRankNormalization<DoubleVector> filter = new ELKIBuilder<>(InstanceLogRankNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// Compute the expected mean and variances..
MeanVariance expected = new MeanVariance();
for (int ii = 0; ii < dim; ii++) {
expected.put(Math.log1p(ii / (double) (dim - 1)) * MathUtil.ONE_BY_LOG2);
}
// The smallest value (except for ties) must be mapped to 0, the largest to
// 1. And (again, except for ties), the mean and variance must match above
// expected values of a uniform distribution.
MeanVarianceMinMax mms = new MeanVarianceMinMax();
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
mms.put(d.doubleValue(col));
}
assertEquals("Min value is not 0", 0., mms.getMin(), 0);
assertEquals("Max value is not 1", 1., mms.getMax(), 0);
assertEquals("Mean value is not as expected", expected.getMean(), mms.getMean(), 1e-14);
assertEquals("Variance is not as expected", expected.getNaiveVariance(), mms.getNaiveVariance(), 1e-14);
mms.reset();
}
}
Aggregations