Search in sources :

Example 1 with IntegerVector

use of de.lmu.ifi.dbs.elki.data.IntegerVector in project elki by elki-project.

the class ReplaceNaNWithRandomFilterTest method parameters.

/**
 * Test with standard normal distribution as parameter.
 */
@Test
public void parameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    ReplaceNaNWithRandomFilter filter = // 
    new ELKIBuilder<>(ReplaceNaNWithRandomFilter.class).with(// 
    ReplaceNaNWithRandomFilter.Parameterizer.REPLACEMENT_DISTRIBUTION, new NormalDistribution(0, 1, new Random(0L))).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(filteredBundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(unfilteredBundle.meta(0)));
    // This cast is now safe (vector field):
    int dimFiltered = ((FieldTypeInformation) unfilteredBundle.meta(0)).getDimensionality();
    int dimUnfiltered = ((FieldTypeInformation) unfilteredBundle.meta(0)).getDimensionality();
    assertEquals("Dimensionality expected equal", dimFiltered, dimUnfiltered);
    // Note the indices of the NaN(s) in the data.
    List<IntegerVector> NaNs = new ArrayList<IntegerVector>();
    for (int row = 0; row < unfilteredBundle.dataLength(); row++) {
        Object obj = unfilteredBundle.data(row, 0);
        assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
        DoubleVector d = (DoubleVector) obj;
        for (int col = 0; col < dimUnfiltered; col++) {
            final double v = d.doubleValue(col);
            if (Double.isNaN(v)) {
                NaNs.add(new IntegerVector(new int[] { row, col }));
            }
        }
    }
    // Verify that at least a single NaN exists in the unfiltered bundle.
    assertTrue("NaN expected in unfiltered data", NaNs.size() > 0);
    for (IntegerVector iv : NaNs) {
        Object obj = filteredBundle.data(iv.intValue(0), 0);
        assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
        DoubleVector d = (DoubleVector) obj;
        final double v = d.doubleValue(iv.intValue(1));
        assertFalse("NaN not expected", Double.isNaN(v));
    }
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) FieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation) IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) Random(java.util.Random) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) ReplaceNaNWithRandomFilter(de.lmu.ifi.dbs.elki.datasource.filter.cleaning.ReplaceNaNWithRandomFilter) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 2 with IntegerVector

use of de.lmu.ifi.dbs.elki.data.IntegerVector in project elki by elki-project.

the class IntegerRankTieNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    IntegerRankTieNormalization filter = new ELKIBuilder<>(IntegerRankTieNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    IntegerArray coldata = new IntegerArray(bundle.dataLength());
    for (int col = 0; col < dim; col++) {
        coldata.clear();
        // Extract the column:
        for (int row = 0; row < bundle.dataLength(); row++) {
            IntegerVector obj = get(bundle, row, 0, IntegerVector.class);
            coldata.add(obj.intValue(col));
        }
        // Sort values:
        coldata.sort();
        // Verify that the gap matches the frequency of each value.
        final int size = coldata.size;
        assertEquals("First value", coldata.get(0), coldata.get(coldata.get(0)));
        for (int i = 0; i < size; ) {
            // s: Start, i: end, v: value, f: frequency
            int s = i, v = coldata.get(i), f = 1;
            while (++i < size && v == coldata.get(i)) {
                f++;
            }
            // Only iff the frequencies is even, the values will be odd.
            assertNotSame("Even/odd rule", (f & 1), (v & 1));
            assertEquals("Bad value at position " + s, s + i - 1, v);
            assertEquals("Bad frequency at position " + s, i - s, f);
        }
    }
}
Also used : IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) IntegerArray(de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 3 with IntegerVector

use of de.lmu.ifi.dbs.elki.data.IntegerVector in project elki by elki-project.

the class IntegerRankTieNormalization method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int len = objects.dataLength();
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    int[] order = new int[len];
    for (int i = 0; i < len; i++) {
        order[i] = i;
    }
    Sorter comparator = new Sorter();
    for (int r = 0; r < objects.metaLength(); r++) {
        final SimpleTypeInformation<?> type = objects.meta(r);
        final List<?> column = objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        @SuppressWarnings("unchecked") final List<? extends NumberVector> castColumn = (List<? extends NumberVector>) column;
        // Get the replacement type information
        final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
        final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
        // Output vectors
        int[][] posvecs = new int[len][dim];
        // Sort for each dimension
        for (int d = 0; d < dim; d++) {
            // Sort
            comparator.setup(castColumn, d);
            IntegerArrayQuickSort.sort(order, comparator);
            // Transfer positions to output vectors
            for (int sta = 0; sta < order.length; ) {
                double v = castColumn.get(order[sta]).doubleValue(d);
                // Compute ties
                int end = sta + 1;
                while (end < order.length && !(v < castColumn.get(order[end]).doubleValue(d))) {
                    end++;
                }
                final int pos = (sta + end - 1);
                for (int i = sta; i < end; i++) {
                    posvecs[order[i]][d] = pos;
                }
                sta = end;
            }
        }
        // Prepare output data
        final List<IntegerVector> outColumn = new ArrayList<>(len);
        for (int i = 0; i < len; i++) {
            outColumn.add(new IntegerVector(posvecs[i]));
        }
        bundle.appendColumn(outType, outColumn);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) ArrayList(java.util.ArrayList) IntegerVector(de.lmu.ifi.dbs.elki.data.IntegerVector) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

IntegerVector (de.lmu.ifi.dbs.elki.data.IntegerVector)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)2 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 FieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.FieldTypeInformation)1 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)1 ReplaceNaNWithRandomFilter (de.lmu.ifi.dbs.elki.datasource.filter.cleaning.ReplaceNaNWithRandomFilter)1 NormalDistribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution)1 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)1 IntegerArray (de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.IntegerArray)1 List (java.util.List)1 Random (java.util.Random)1