Search in sources :

Example 61 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AttributeWiseCDFNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normally-distributed-data-1.csv";
    AttributeWiseCDFNormalization<DoubleVector> filter = // 
    new ELKIBuilder<AttributeWiseCDFNormalization<DoubleVector>>(AttributeWiseCDFNormalization.class).with(// 
    AttributeWiseCDFNormalization.Parameterizer.DISTRIBUTIONS_ID, // 
    Arrays.asList(NormalMOMEstimator.STATIC, UniformMinMaxEstimator.STATIC)).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // We expect that approximately 25% of the values in each row are 0-0.25,
    // 25% in 0.25-0.5, 25% in 0.5-0.75, and 25% in 0.75-1 for each dimension
    int[][] counts = new int[dim][4];
    final int size = bundle.dataLength();
    for (int row = 0; row < size; row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double val = d.doubleValue(col);
            int q = (int) (val * 4);
            counts[col][q]++;
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("~25% of the values in each column should be between 0 and 0.25", .25, counts[col][0] / (double) size, .02);
        assertEquals("~25% of the values in each column should be between 0.25 and 0.5", .25, counts[col][1] / (double) size, .02);
        assertEquals("~25% of the values in each column should be between 0.5 and 0.75", .25, counts[col][2] / (double) size, .03);
        assertEquals("~25% of the values in each column should be between 0.5 and 0.75", .25, counts[col][3] / (double) size, .02);
    }
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 62 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AttributeWiseMADNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    AttributeWiseMADNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMADNormalization<DoubleVector>>(AttributeWiseMADNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Count how many values in each column are positive, how many are negative,
    // and how many are greater than 1, or less than -1.
    int[][] counts = new int[dim][4];
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double val = d.doubleValue(col);
            counts[col][val > 0. ? 0 : 1]++;
            counts[col][Math.abs(val) >= NormalDistribution.PHIINV075 ? 2 : 3]++;
        }
    }
    // Verify that ~50% of the values are either greater than 1 or less than -1.
    for (int col = 0; col < dim; col++) {
        assertEquals("~50% of the values in each column should be positive", .5, counts[col][0] / (double) bundle.dataLength(), 0.);
        assertEquals("~50% of the values in each column should be negative", .5, counts[col][1] / (double) bundle.dataLength(), 0.);
        assertEquals("~50% of the values in each column should be > 1 or < -1", .5, counts[col][2] / (double) bundle.dataLength(), 0.);
        assertEquals("~50% of the values in each column should be -1 to +1", .5, counts[col][3] / (double) bundle.dataLength(), 0.);
    }
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 63 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AttributeWiseMeanNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    AttributeWiseMeanNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMeanNormalization<DoubleVector>>(AttributeWiseMeanNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // We verify that all columns have a mean of 0:
    Mean[] ms = Mean.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double val = d.doubleValue(col);
            if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) {
                ms[col].put(val);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Mean is not 1", 1., ms[col].getMean(), 1e-14);
    }
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 64 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AttributeWiseMinMaxNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    AttributeWiseMinMaxNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseMinMaxNormalization<DoubleVector>>(AttributeWiseMinMaxNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // We verify that minimum and maximum values in each column are 0 and 1:
    DoubleMinMax[] mms = DoubleMinMax.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double val = d.doubleValue(col);
            if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) {
                mms[col].put(val);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Minimum not as expected", 0., mms[col].getMin(), 0.);
        assertEquals("Maximum not as expected", 1., mms[col].getMax(), 0.);
    }
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 65 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class AttributeWiseVarianceNormalizationTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "normalization-test-1.csv";
    AttributeWiseVarianceNormalization<DoubleVector> filter = new ELKIBuilder<AttributeWiseVarianceNormalization<DoubleVector>>(AttributeWiseVarianceNormalization.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // We verify that the resulting data has mean 0 and variance 1 in each
    // column:
    MeanVariance[] mvs = MeanVariance.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
                mvs[col].put(v);
            }
        }
    }
    for (int col = 0; col < dim; col++) {
        assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-14);
        assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-14);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4