Search in sources :

Example 6 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AttributeWiseVarianceNormalizationTest method testNaNParameters.

/**
 * Test with default parameters and for correcting handling of NaN and Inf.
 */
@Test
public void testNaNParameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    AttributeWiseVarianceNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseVarianceNormalization<DoubleVector>>(AttributeWiseVarianceNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    // This cast is now safe (vector field):
    int dim = ((FieldTypeInformation) bundle.meta(0)).getDimensionality();
    // Verify that the resulting data has mean 0 and variance 1 in each column:
    MeanVariance[] mvs = MeanVariance.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
                mvs[col].put(v);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-15);
        assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-15);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) FieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 7 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class IntegerRankTieNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    IntegerRankTieNormalization filter = new ELKIBuilder<>(IntegerRankTieNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    IntegerArray coldata = new IntegerArray(bundle.dataLength());
    for (int col = 0; col < dim; col++) {
        coldata.clear();
        // Extract the column:
        for (int row = 0; row < bundle.dataLength(); row++) {
            IntegerVector obj = get(bundle, row, 0, IntegerVector.class);
            coldata.add(obj.intValue(col));
        }
        // Sort values:
        coldata.sort();
        // Verify that the gap matches the frequency of each value.
        final int size = coldata.size;
        assertEquals("First value", coldata.get(0), coldata.get(coldata.get(0)));
        for (int i = 0; i < size; ) {
            // s: Start, i: end, v: value, f: frequency
            int s = i, v = coldata.get(i), f = 1;
            while (++i < size && v == coldata.get(i)) {
                f++;
            }
            // Only iff the frequencies is even, the values will be odd.
            assertNotSame("Even/odd rule", (f & 1), (v & 1));
            assertEquals("Bad value at position " + s, s + i - 1, v);
            assertEquals("Bad frequency at position " + s, i - s, f);
        }
    }
}
Also used : IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 8 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class LengthNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    LengthNormalization<DoubleVector> filter = new ELKIBuilder<>(LengthNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Verify that the length of each row vector is 1.
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        double len = 0.0;
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            len += v * v;
        }
        assertEquals("Vector length is not 1", 1., len, 1e-15);
    }
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 9 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class Log1PlusNormalizationTest method parameters.

/**
 * Test with non-default parameters to ensure that both branches of the filter
 * are tested.
 */
@Test
public void parameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    // Use the value of b as the boost value.
    double b = 15.;
    ListParameterization config = new ListParameterization();
    config.addParameter(Log1PlusNormalization.Parameterizer.BOOST_ID, b);
    Log1PlusNormalization<DoubleVector> filter = ClassGenericsUtil.parameterizeOrAbort(Log1PlusNormalization.class, config);
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Check dimensionalities
    int dim = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    assertEquals("Dimensionality changed", dim, getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD));
    // Verify that the filtered and unfiltered bundles have the same length.
    assertEquals("Length changed", filteredBundle.dataLength(), unfilteredBundle.dataLength());
    // method.
    for (int row = 0; row < filteredBundle.dataLength(); row++) {
        DoubleVector dFil = get(filteredBundle, row, 0, DoubleVector.class);
        DoubleVector dUnfil = get(unfilteredBundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double vFil = dFil.doubleValue(col);
            final double vUnfil = dUnfil.doubleValue(col);
            assertEquals("Value not as expected", vFil, FastMath.log1p(Math.abs(vUnfil) * b) / FastMath.log1p(b), 1e-15);
        }
    }
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 10 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ByLabelFilterTest method parameters.

/**
 * Test with parameter s as the label to look for.
 */
@Test
public void parameters() {
    String s = "yes";
    String filename = UNITTEST + "label-selection-test-1.csv";
    ByLabelFilter filter = // 
    new ELKIBuilder<>(ByLabelFilter.class).with(ByLabelFilter.Parameterizer.LABELFILTER_PATTERN_ID, // 
    s).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Ensure the first column are the vectors.
    assertEquals("Dimensionality", getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD), getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD));
    // Verify that the filter selected all vectors which match the pattern.
    int count_match = 0;
    for (int row = 0; row < unfilteredBundle.dataLength(); row++) {
        LabelList ll = get(unfilteredBundle, row, 1, LabelList.class);
        if (ll.get(0).equals(s)) {
            count_match++;
        }
    }
    assertTrue("Expected at least one match", count_match > 0);
    assertEquals("Unexpected number of matches", count_match, filteredBundle.dataLength());
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4