Search in sources :

Example 1 with FieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation in project elki by elki-project.

the class ReplaceNaNWithRandomFilterTest method parameters.

/**
 * Test with standard normal distribution as parameter.
 */
@Test
public void parameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    ReplaceNaNWithRandomFilter filter = // 
    new ELKIBuilder<>(ReplaceNaNWithRandomFilter.class).with(// 
    ReplaceNaNWithRandomFilter.Parameterizer.REPLACEMENT_DISTRIBUTION, new NormalDistribution(0, 1, new Random(0L))).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(filteredBundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(unfilteredBundle.meta(0)));
    // This cast is now safe (vector field):
    int dimFiltered = ((FieldTypeInformation) unfilteredBundle.meta(0)).getDimensionality();
    int dimUnfiltered = ((FieldTypeInformation) unfilteredBundle.meta(0)).getDimensionality();
    assertEquals("Dimensionality expected equal", dimFiltered, dimUnfiltered);
    // Note the indices of the NaN(s) in the data.
    List<IntegerVector> NaNs = new ArrayList<IntegerVector>();
    for (int row = 0; row < unfilteredBundle.dataLength(); row++) {
        Object obj = unfilteredBundle.data(row, 0);
        assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
        DoubleVector d = (DoubleVector) obj;
        for (int col = 0; col < dimUnfiltered; col++) {
            final double v = d.doubleValue(col);
            if (Double.isNaN(v)) {
                NaNs.add(new IntegerVector(new int[] { row, col }));
            }
        }
    }
    // Verify that at least a single NaN exists in the unfiltered bundle.
    assertTrue("NaN expected in unfiltered data", NaNs.size() > 0);
    for (IntegerVector iv : NaNs) {
        Object obj = filteredBundle.data(iv.intValue(0), 0);
        assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
        DoubleVector d = (DoubleVector) obj;
        final double v = d.doubleValue(iv.intValue(1));
        assertFalse("NaN not expected", Double.isNaN(v));
    }
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) FieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation) IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) Random(java.util.Random) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) ReplaceNaNWithRandomFilter(de.lmu.ifi.dbs.elki.datasource.filter.cleaning.ReplaceNaNWithRandomFilter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 2 with FieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation in project elki by elki-project.

the class AttributeWiseMinMaxNormalizationTest method testNaNParameters.

/**
 * Test with default parameters and for correcting handling of NaN and Inf.
 */
@Test
public void testNaNParameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    AttributeWiseMinMaxNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMinMaxNormalization<DoubleVector>>(AttributeWiseMinMaxNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    // This cast is now safe (vector field):
    int dim = ((FieldTypeInformation) bundle.meta(0)).getDimensionality();
    // We verify that minimum and maximum values in each column are 0 and 1:
    DoubleMinMax[] mms = DoubleMinMax.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double val = d.doubleValue(col);
            if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) {
                mms[col].put(val);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Minimum not as expected", 0., mms[col].getMin(), 0.);
        assertEquals("Maximum not as expected", 1., mms[col].getMax(), 0.);
    }
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) FieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 3 with FieldTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation in project elki by elki-project.

the class AttributeWiseVarianceNormalizationTest method testNaNParameters.

/**
 * Test with default parameters and for correcting handling of NaN and Inf.
 */
@Test
public void testNaNParameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    AttributeWiseVarianceNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseVarianceNormalization<DoubleVector>>(AttributeWiseVarianceNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    // This cast is now safe (vector field):
    int dim = ((FieldTypeInformation) bundle.meta(0)).getDimensionality();
    // Verify that the resulting data has mean 0 and variance 1 in each column:
    MeanVariance[] mvs = MeanVariance.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
                mvs[col].put(v);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-15);
        assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-15);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) FieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 FieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation)3 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 Test (org.junit.Test)3 IntegerVector (de.lmu.ifi.dbs.elki.data.IntegerVector)1 ReplaceNaNWithRandomFilter (de.lmu.ifi.dbs.elki.datasource.filter.cleaning.ReplaceNaNWithRandomFilter)1 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)1 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)1 NormalDistribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution)1 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1