use of de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution in project elki by elki-project.
the class AttributeWiseBetaNormalizationTest method parameters.
/**
* Test with parameter p as alpha.
*/
@Test
public void parameters() {
final double p = .88;
String filename = UNITTEST + "normally-distributed-data-1.csv";
AttributeWiseBetaNormalization<DoubleVector> filter = //
new ELKIBuilder<AttributeWiseBetaNormalization<DoubleVector>>(AttributeWiseBetaNormalization.class).with(AttributeWiseBetaNormalization.Parameterizer.ALPHA_ID, //
p).with(//
AttributeWiseBetaNormalization.Parameterizer.DISTRIBUTIONS_ID, //
Arrays.asList(NormalMOMEstimator.STATIC, UniformMinMaxEstimator.STATIC)).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
BetaDistribution dist = new BetaDistribution(p, p);
final double quantile = dist.quantile(p);
// Verify that p% of the values in each column are less than the quantile.
int[] countUnderQuantile = new int[dim];
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double v = d.doubleValue(col);
if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
if (v < quantile) {
countUnderQuantile[col]++;
}
}
}
}
for (int col = 0; col < dim; col++) {
double actual = countUnderQuantile[col] / (double) bundle.dataLength();
assertEquals("p% of the values should be under the quantile", p, actual, .05);
}
}
use of de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution in project elki by elki-project.
the class AttributeWiseBetaNormalization method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
if (objects.dataLength() == 0) {
return objects;
}
for (int r = 0; r < objects.metaLength(); r++) {
SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
final List<?> column = (List<?>) objects.getColumn(r);
if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
continue;
}
@SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
// Get the replacement type information
@SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
factory = FilterUtil.guessFactory(castType);
// Scan to find the best
final int dim = castType.getDimensionality();
dists = new ArrayList<>(dim);
// Scratch space for testing:
double[] test = new double[castColumn.size()];
// We iterate over dimensions, this kind of filter needs fast random
// access.
Adapter adapter = new Adapter();
for (int d = 0; d < dim; d++) {
adapter.dim = d;
Distribution dist = findBestFit(castColumn, adapter, d, test);
if (LOG.isVerbose()) {
LOG.verbose("Best fit for dimension " + d + ": " + dist.toString());
}
dists.add(dist);
}
// Beta distribution for projection
double p = FastMath.pow(alpha, -1 / FastMath.sqrt(dim));
BetaDistribution beta = new BetaDistribution(p, p);
// Normalization scan
double[] buf = new double[dim];
for (int i = 0; i < objects.dataLength(); i++) {
final V obj = castColumn.get(i);
for (int d = 0; d < dim; d++) {
// TODO: when available, use logspace for better numerical precision!
buf[d] = beta.quantile(dists.get(d).cdf(obj.doubleValue(d)));
}
castColumn.set(i, factory.newNumberVector(buf));
}
}
return objects;
}
Aggregations