use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AttributeWiseCDFNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normally-distributed-data-1.csv";
AttributeWiseCDFNormalization<DoubleVector> filter = //
new ELKIBuilder<AttributeWiseCDFNormalization<DoubleVector>>(AttributeWiseCDFNormalization.class).with(//
AttributeWiseCDFNormalization.Parameterizer.DISTRIBUTIONS_ID, //
Arrays.asList(NormalMOMEstimator.STATIC, UniformMinMaxEstimator.STATIC)).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// We expect that approximately 25% of the values in each row are 0-0.25,
// 25% in 0.25-0.5, 25% in 0.5-0.75, and 25% in 0.75-1 for each dimension
int[][] counts = new int[dim][4];
final int size = bundle.dataLength();
for (int row = 0; row < size; row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double val = d.doubleValue(col);
int q = (int) (val * 4);
counts[col][q]++;
}
}
for (int col = 0; col < dim; col++) {
assertEquals("~25% of the values in each column should be between 0 and 0.25", .25, counts[col][0] / (double) size, .02);
assertEquals("~25% of the values in each column should be between 0.25 and 0.5", .25, counts[col][1] / (double) size, .02);
assertEquals("~25% of the values in each column should be between 0.5 and 0.75", .25, counts[col][2] / (double) size, .03);
assertEquals("~25% of the values in each column should be between 0.5 and 0.75", .25, counts[col][3] / (double) size, .02);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AttributeWiseMADNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
AttributeWiseMADNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMADNormalization<DoubleVector>>(AttributeWiseMADNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// Count how many values in each column are positive, how many are negative,
// and how many are greater than 1, or less than -1.
int[][] counts = new int[dim][4];
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double val = d.doubleValue(col);
counts[col][val > 0. ? 0 : 1]++;
counts[col][Math.abs(val) >= NormalDistribution.PHIINV075 ? 2 : 3]++;
}
}
// Verify that ~50% of the values are either greater than 1 or less than -1.
for (int col = 0; col < dim; col++) {
assertEquals("~50% of the values in each column should be positive", .5, counts[col][0] / (double) bundle.dataLength(), 0.);
assertEquals("~50% of the values in each column should be negative", .5, counts[col][1] / (double) bundle.dataLength(), 0.);
assertEquals("~50% of the values in each column should be > 1 or < -1", .5, counts[col][2] / (double) bundle.dataLength(), 0.);
assertEquals("~50% of the values in each column should be -1 to +1", .5, counts[col][3] / (double) bundle.dataLength(), 0.);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AttributeWiseMeanNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
AttributeWiseMeanNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMeanNormalization<DoubleVector>>(AttributeWiseMeanNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// We verify that all columns have a mean of 0:
Mean[] ms = Mean.newArray(dim);
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double val = d.doubleValue(col);
if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) {
ms[col].put(val);
}
}
}
for (int col = 0; col < dim; col++) {
assertEquals("Mean is not 1", 1., ms[col].getMean(), 1e-14);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AttributeWiseMinMaxNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
AttributeWiseMinMaxNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMinMaxNormalization<DoubleVector>>(AttributeWiseMinMaxNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// We verify that minimum and maximum values in each column are 0 and 1:
DoubleMinMax[] mms = DoubleMinMax.newArray(dim);
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double val = d.doubleValue(col);
if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) {
mms[col].put(val);
}
}
}
for (int col = 0; col < dim; col++) {
assertEquals("Minimum not as expected", 0., mms[col].getMin(), 0.);
assertEquals("Maximum not as expected", 1., mms[col].getMax(), 0.);
}
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class AttributeWiseVarianceNormalizationTest method defaultParameters.
/**
* Test with default parameters.
*/
@Test
public void defaultParameters() {
String filename = UNITTEST + "normalization-test-1.csv";
AttributeWiseVarianceNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseVarianceNormalization<DoubleVector>>(AttributeWiseVarianceNormalization.class).build();
MultipleObjectsBundle bundle = readBundle(filename, filter);
int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
// We verify that the resulting data has mean 0 and variance 1 in each
// column:
MeanVariance[] mvs = MeanVariance.newArray(dim);
for (int row = 0; row < bundle.dataLength(); row++) {
DoubleVector d = get(bundle, row, 0, DoubleVector.class);
for (int col = 0; col < dim; col++) {
final double v = d.doubleValue(col);
if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
mvs[col].put(v);
}
}
}
for (int col = 0; col < dim; col++) {
assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-14);
assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-14);
}
}
Aggregations