use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class AttributeWiseVarianceNormalizationTest method testNaNParameters.
/**
* Test with default parameters and for correcting handling of NaN and Inf.
*/
@Test
public void testNaNParameters() {
String filename = UNITTEST + "nan-test-1.csv";
AttributeWiseVarianceNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseVarianceNormalization<DoubleVector>>(AttributeWiseVarianceNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
// This cast is now safe (vector field):
int dim = ((FieldTypeInformation) bundle.meta(0)).getDimensionality();
// Verify that the resulting data has mean 0 and variance 1 in each column:
MeanVariance[] mvs = MeanVariance.newArray(dim);
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double v = d.doubleValue(col);
if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
mvs[col].put(v);
}
}
}
for (int col = 0; col < dim; col++) {
assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-15);
assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-15);
}
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class LMCLUS method findAndEvaluateThreshold.
/**
* Evaluate the histogram to find a suitable threshold
*
* @param histogram Histogram to evaluate
* @return Position and goodness
*/
private double[] findAndEvaluateThreshold(DoubleDynamicHistogram histogram) {
int n = histogram.getNumBins();
double[] p1 = new double[n];
double[] p2 = new double[n];
double[] mu1 = new double[n];
double[] mu2 = new double[n];
double[] sigma1 = new double[n];
double[] sigma2 = new double[n];
double[] jt = new double[n];
// Forward pass
{
MeanVariance mv = new MeanVariance();
DoubleHistogram.Iter forward = histogram.iter();
for (int i = 0; forward.valid(); i++, forward.advance()) {
p1[i] = forward.getValue() + ((i > 0) ? p1[i - 1] : 0);
mv.put(i, forward.getValue());
mu1[i] = mv.getMean();
sigma1[i] = mv.getNaiveStddev();
}
}
// Backwards pass
{
MeanVariance mv = new MeanVariance();
DoubleHistogram.Iter backwards = histogram.iter();
// Seek to last
backwards.seek(histogram.getNumBins() - 1);
for (int j = n - 1; backwards.valid(); j--, backwards.retract()) {
p2[j] = backwards.getValue() + ((j + 1 < n) ? p2[j + 1] : 0);
mv.put(j, backwards.getValue());
mu2[j] = mv.getMean();
sigma2[j] = mv.getNaiveStddev();
}
}
for (int i = 0; i < n; i++) {
jt[i] = 1.0 + 2 * (p1[i] * (FastMath.log(sigma1[i]) - FastMath.log(p1[i])) + p2[i] * (FastMath.log(sigma2[i]) - FastMath.log(p2[i])));
}
int bestpos = -1;
double bestgoodness = Double.NEGATIVE_INFINITY;
double devPrev = jt[1] - jt[0];
for (int i = 1; i < jt.length - 1; i++) {
double devCur = jt[i + 1] - jt[i];
// Local minimum found - calculate depth
if (devCur >= 0 && devPrev <= 0) {
double lowestMaxima = Double.POSITIVE_INFINITY;
for (int j = i - 1; j > 0; j--) {
if (jt[j - 1] < jt[j]) {
lowestMaxima = Math.min(lowestMaxima, jt[j]);
break;
}
}
for (int j = i + 1; j < n - 2; j++) {
if (jt[j + 1] < jt[j]) {
lowestMaxima = Math.min(lowestMaxima, jt[j]);
break;
}
}
double localDepth = lowestMaxima - jt[i];
final double mud = mu1[i] - mu2[i];
double discriminability = mud * mud / (sigma1[i] * sigma1[i] + sigma2[i] * sigma2[i]);
if (Double.isNaN(discriminability)) {
discriminability = -1;
}
double goodness = localDepth * discriminability;
if (goodness > bestgoodness) {
bestgoodness = goodness;
bestpos = i;
}
}
devPrev = devCur;
}
Iter iter = histogram.iter();
iter.seek(bestpos);
return new double[] { iter.getRight(), bestgoodness };
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class ExpGammaExpMOMEstimator method estimate.
@Override
public <A> ExpGammaDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) {
final int len = adapter.size(data);
MeanVariance mv = new MeanVariance();
for (int i = 0; i < len; i++) {
mv.put(FastMath.exp(adapter.getDouble(data, i)));
}
return estimateFromExpMeanVariance(mv);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class LogMeanVarianceEstimator method estimate.
@Override
default <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) {
final int len = adapter.size(data);
double min = LogMOMDistributionEstimator.min(data, adapter, 0., 1e-10);
MeanVariance mv = new MeanVariance();
for (int i = 0; i < len; i++) {
final double val = adapter.getDouble(data, i) - min;
if (Double.isInfinite(val) || Double.isNaN(val) || val <= 0.) {
continue;
}
mv.put(FastMath.log(val));
}
if (!(mv.getCount() > 1.)) {
throw new ArithmeticException("Too small sample size to estimate variance.");
}
return estimateFromLogMeanVariance(mv, min);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
MeanVariance ksize = new MeanVariance();
if (LOG.isVerbose()) {
LOG.verbose("Approximating nearest neighbor lists to database objects");
}
// Produce a random shuffling of the IDs:
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
for (int part = 0; part < partitions; part++) {
final ArrayDBIDs ids = parts[part];
final int size = ids.size();
Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
cache.defaultReturnValue(Double.NaN);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(k);
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
DBIDPair key = DBIDUtil.newPair(iter, iter2);
double d = cache.removeDouble(key);
if (d == d) {
// Not NaN
// consume the previous result.
kNN.insert(d, iter2);
} else {
// compute new and store the previous result.
d = distanceQuery.distance(iter, iter2);
kNN.insert(d, iter2);
// put it into the cache, but with the keys reversed
key = DBIDUtil.newPair(iter2, iter);
cache.put(key, d);
}
}
ksize.put(kNN.size());
storage.put(iter, kNN.toKNNList());
}
if (LOG.isDebugging() && cache.size() > 0) {
LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
}
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isVerbose()) {
LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
}
}
Aggregations