Search in sources :

Example 66 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class HellingerHistogramNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    HellingerHistogramNormalization<DoubleVector> filter = new ELKIBuilder<>(HellingerHistogramNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Verify that, in each vector, the sum of the squares is equal to 1.
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        double sum = 0.;
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            sum += v * v;
        }
        assertEquals("Sum of squares not as expected", .5, sum, 1e-15);
    }
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 67 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class InstanceLogRankNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    InstanceLogRankNormalization<DoubleVector> filter = new ELKIBuilder<>(InstanceLogRankNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Compute the expected mean and variances..
    MeanVariance expected = new MeanVariance();
    for (int ii = 0; ii < dim; ii++) {
        expected.put(Math.log1p(ii / (double) (dim - 1)) * MathUtil.ONE_BY_LOG2);
    }
    // The smallest value (except for ties) must be mapped to 0, the largest to
    // 1. And (again, except for ties), the mean and variance must match above
    // expected values of a uniform distribution.
    MeanVarianceMinMax mms = new MeanVarianceMinMax();
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            mms.put(d.doubleValue(col));
        }
        assertEquals("Min value is not 0", 0., mms.getMin(), 0);
        assertEquals("Max value is not 1", 1., mms.getMax(), 0);
        assertEquals("Mean value is not as expected", expected.getMean(), mms.getMean(), 1e-14);
        assertEquals("Variance is not as expected", expected.getNaiveVariance(), mms.getNaiveVariance(), 1e-14);
        mms.reset();
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MeanVarianceMinMax(de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 68 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class DropNaNFilterTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "nan-test-1.csv";
    DropNaNFilter filter = new ELKIBuilder<>(DropNaNFilter.class).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Get dimensionalities
    int dimFiltered = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    int dimUnfiltered = getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Ensure that at least a single NaN exists in the unfiltered bundle.
    boolean NaNfound = false;
    for (int row = 0; row < unfilteredBundle.dataLength(); row++) {
        DoubleVector d = get(unfilteredBundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dimUnfiltered; col++) {
            final double v = d.doubleValue(col);
            if (Double.isNaN(v)) {
                NaNfound = true;
                break;
            }
        }
    }
    assertTrue("NaN expected in unfiltered data", NaNfound);
    // Ensure that no single NaN exists in the filtered bundle.
    for (int row = 0; row < filteredBundle.dataLength(); row++) {
        DoubleVector d = get(filteredBundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dimFiltered; col++) {
            assertFalse("NaN not expected", Double.isNaN(d.doubleValue(col)));
        }
    }
}
Also used : DropNaNFilter(de.lmu.ifi.dbs.elki.datasource.filter.cleaning.DropNaNFilter) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 69 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class DBIDRangeDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    MultipleObjectsBundle b = new MultipleObjectsBundle();
    b.setDBIDs(DBIDFactory.FACTORY.generateStaticDBIDRange(start, count));
    return b;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)

Example 70 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class InputStreamDatabaseConnection method loadData.

@Override
public MultipleObjectsBundle loadData() {
    // Run parser
    if (LOG.isDebugging()) {
        LOG.debugFine("Invoking parsers.");
    }
    // Streaming parsers may yield to stream filters immediately.
    if (parser instanceof StreamingParser) {
        final StreamingParser streamParser = (StreamingParser) parser;
        streamParser.initStream(in);
        // normalize objects and transform labels
        if (LOG.isDebugging()) {
            LOG.debugFine("Parsing as stream.");
        }
        Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".load").begin() : null;
        MultipleObjectsBundle objects = invokeStreamFilters(streamParser).asMultipleObjectsBundle();
        parser.cleanup();
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        return objects;
    } else {
        // For non-streaming parsers, we first parse, then filter
        Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".parse").begin() : null;
        MultipleObjectsBundle parsingResult = parser.parse(in);
        parser.cleanup();
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        // normalize objects and transform labels
        if (LOG.isDebugging()) {
            LOG.debugFine("Invoking filters.");
        }
        Duration fduration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".filter").begin() : null;
        MultipleObjectsBundle objects = invokeBundleFilters(parsingResult);
        if (fduration != null) {
            LOG.statistics(fduration.end());
        }
        return objects;
    }
}
Also used : StreamingParser(de.lmu.ifi.dbs.elki.datasource.parser.StreamingParser) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4