Search in sources :

Example 11 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class SortByLabelFilterTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "label-selection-test-1.csv";
    SortByLabelFilter filter = new ELKIBuilder<>(SortByLabelFilter.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Expect vectors to come first, labels second.
    getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // Verify that the vectors are in alphabetical order.
    for (int row = 0; row < bundle.dataLength() - 1; row++) {
        LabelList llFirst = get(bundle, row, 1, LabelList.class);
        LabelList llSecond = get(bundle, row + 1, 1, LabelList.class);
        assertTrue("Expected alphabetical order", llFirst.get(0).compareToIgnoreCase(llSecond.get(0)) <= 0);
    }
}
Also used : LabelList(de.lmu.ifi.dbs.elki.data.LabelList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 12 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ClassicMultidimensionalScalingTransformTest method parameters.

/**
 * Test with parameters.
 */
@Test
public void parameters() {
    int pdim = 2;
    String filename = UNITTEST + "transformation-test-1.csv";
    ClassicMultidimensionalScalingTransform<DoubleVector, DoubleVector> filter = // 
    new ELKIBuilder<ClassicMultidimensionalScalingTransform<DoubleVector, DoubleVector>>(ClassicMultidimensionalScalingTransform.class).with(ClassicMultidimensionalScalingTransform.Parameterizer.DIM_ID, // 
    pdim).with(ClassicMultidimensionalScalingTransform.Parameterizer.DISTANCE_ID, // 
    EuclideanDistanceFunction.class).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    int dimu = getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    int dimf = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    assertEquals("Dimensionality not as requested", pdim, dimf);
    // Verify that the Euclidean distance between any two points is identical
    // before and after the MDS transform is performed - O(n^2)!
    // Calculate the covariance matricies of the filtered and unfiltered
    // bundles.
    CovarianceMatrix cmUnfil = new CovarianceMatrix(dimu);
    CovarianceMatrix cmFil = new CovarianceMatrix(dimf);
    for (int outer = 0; outer < filteredBundle.dataLength(); outer++) {
        DoubleVector dFil_1 = get(filteredBundle, outer, 0, DoubleVector.class);
        DoubleVector dUnfil_1 = get(unfilteredBundle, outer, 0, DoubleVector.class);
        cmUnfil.put(dUnfil_1);
        cmFil.put(dFil_1);
        for (int row = outer + 1; row < filteredBundle.dataLength(); row++) {
            DoubleVector dFil_2 = get(filteredBundle, row, 0, DoubleVector.class);
            DoubleVector dUnfil_2 = get(unfilteredBundle, row, 0, DoubleVector.class);
            final double distF = EuclideanDistanceFunction.STATIC.distance(dFil_1, dFil_2);
            final double distU = EuclideanDistanceFunction.STATIC.distance(dUnfil_1, dUnfil_2);
            assertEquals("Expected same distance", distU, distF, 1e-11);
        }
    }
    // Calculate the SVD of the covariance matrix of the unfiltered data.
    // Verify that this SVD represents the diagonals of the covariance matrix of
    // the filtered data.
    double[][] ncmUnfil = cmUnfil.destroyToPopulationMatrix();
    double[][] ncmFil = cmFil.destroyToPopulationMatrix();
    SingularValueDecomposition svd = new SingularValueDecomposition(ncmUnfil);
    double[] dia = svd.getSingularValues();
    for (int ii = 0; ii < dia.length; ii++) {
        assertEquals("Unexpected covariance", dia[ii], ncmFil[ii][ii], 1e-11);
    }
}
Also used : EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 13 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class FastMultidimensionalScalingTransformTest method parameters.

/**
 * Test with parameters.
 */
@Test
public void parameters() {
    int pdim = 2;
    String filename = UNITTEST + "transformation-test-1.csv";
    FastMultidimensionalScalingTransform<DoubleVector, DoubleVector> filter = // 
    new ELKIBuilder<FastMultidimensionalScalingTransform<DoubleVector, DoubleVector>>(FastMultidimensionalScalingTransform.class).with(ClassicMultidimensionalScalingTransform.Parameterizer.DIM_ID, // 
    pdim).with(FastMultidimensionalScalingTransform.Parameterizer.RANDOM_ID, // 
    0L).with(ClassicMultidimensionalScalingTransform.Parameterizer.DISTANCE_ID, // 
    EuclideanDistanceFunction.class).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    int dimu = getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    int dimf = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    assertEquals("Dimensionality not as requested", pdim, dimf);
    // Verify that the Euclidean distance between any two points is identical
    // before and after the MDS transform is performed - O(n^2)!
    // Calculate the covariance matricies of the filtered and unfiltered
    // bundles.
    CovarianceMatrix cmUnfil = new CovarianceMatrix(dimu);
    CovarianceMatrix cmFil = new CovarianceMatrix(dimf);
    for (int outer = 0; outer < filteredBundle.dataLength(); outer++) {
        DoubleVector dFil_1 = get(filteredBundle, outer, 0, DoubleVector.class);
        DoubleVector dUnfil_1 = get(unfilteredBundle, outer, 0, DoubleVector.class);
        cmUnfil.put(dUnfil_1);
        cmFil.put(dFil_1);
        for (int row = outer + 1; row < filteredBundle.dataLength(); row++) {
            DoubleVector dFil_2 = get(filteredBundle, row, 0, DoubleVector.class);
            DoubleVector dUnfil_2 = get(unfilteredBundle, row, 0, DoubleVector.class);
            final double distF = EuclideanDistanceFunction.STATIC.distance(dFil_1, dFil_2);
            final double distU = EuclideanDistanceFunction.STATIC.distance(dUnfil_1, dUnfil_2);
            assertEquals("Expected same distance", distU, distF, 1e-10);
        }
    }
    // Calculate the SVD of the covariance matrix of the unfiltered data.
    // Verify that this SVD represents the diagonals of the covariance matrix of
    // the filtered data.
    double[][] ncmUnfil = cmUnfil.destroyToPopulationMatrix();
    double[][] ncmFil = cmFil.destroyToPopulationMatrix();
    SingularValueDecomposition svd = new SingularValueDecomposition(ncmUnfil);
    double[] dia = svd.getSingularValues();
    for (int ii = 0; ii < dia.length; ii++) {
        assertEquals("Unexpected covariance", dia[ii], ncmFil[ii][ii], 1e-8);
    }
}
Also used : EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 14 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class HistogramJitterFilterTest method parameters.

/**
 * Test with seed of 0 and given jitter amount.
 */
@Test
public void parameters() {
    String filename = UNITTEST + "transformation-test-1.csv";
    // Use the value of s as the seed value and j as the jitter amount.
    final double s = 0.;
    final double j = .01;
    HistogramJitterFilter<DoubleVector> filter = // 
    new ELKIBuilder<>(HistogramJitterFilter.class).with(HistogramJitterFilter.Parameterizer.SEED_ID, // 
    s).with(HistogramJitterFilter.Parameterizer.JITTER_ID, // 
    j).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    int dim = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    assertEquals("Dimensionality changed", dim, getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD));
    // Verify that the filtered and unfiltered bundles have the same length.
    assertEquals("Test file interpreted incorrectly", filteredBundle.dataLength(), unfilteredBundle.dataLength());
    // Verify that at least p% of the values are within a% of the unfiltered
    // value.
    final double p = .9, a = .1;
    int withinRange = 0;
    for (int row = 0; row < filteredBundle.dataLength(); row++) {
        DoubleVector dFil = get(filteredBundle, row, 0, DoubleVector.class);
        DoubleVector dUnfil = get(unfilteredBundle, row, 0, DoubleVector.class);
        for (int col = 0; col < dim; col++) {
            final double vFil = dFil.doubleValue(col);
            final double vUnfil = dUnfil.doubleValue(col);
            if (Math.abs((vFil / vUnfil) - 1.) <= a) {
                withinRange++;
            }
        }
    }
    assertEquals("Too many values have moved too much", 1., withinRange / (double) (dim * filteredBundle.dataLength()), 1. - p);
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 15 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ExternalIDFilterTest method parameters.

/**
 * Test with parameter c as the column whose label is to be extacted.
 */
@Test
public void parameters() {
    final int c = 2;
    String filename = UNITTEST + "external-id-test-1.csv";
    ExternalIDFilter filter = // 
    new ELKIBuilder<>(ExternalIDFilter.class).with(ExternalIDFilter.Parameterizer.EXTERNALID_INDEX_ID, c).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is an ExternalID
    // We expect that the bundle's third column is a LabelList object.
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    // Ensure that the second column are the ExternalID objects.
    Object obj = bundle.data(0, 1);
    assertEquals("Unexpected data type", ExternalID.class, obj.getClass());
    // Ensure that the length of the list of ExternalID objects has the correct
    // length.
    assertEquals("Unexpected data length", bundle.dataLength(), bundle.getColumn(1).size());
    // Ensure that the third column are the LabelList objects.
    obj = bundle.data(0, 2);
    assertEquals("Unexpected data type", LabelList.class, obj.getClass());
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4