Search in sources :

Example 41 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class SplitNumberVectorFilterTest method parameters.

/**
 * Test with parameter s as a list of the columns to split into the first
 * bundle column.
 */
@Test
public void parameters() {
    String s = "0,1,2,3,4";
    int s_int = 5;
    String filename = UNITTEST + "dimensionality-test-1.csv";
    SplitNumberVectorFilter<DoubleVector> filter = // 
    new ELKIBuilder<>(SplitNumberVectorFilter.class).with(SplitNumberVectorFilter.Parameterizer.SELECTED_ATTRIBUTES_ID, s).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(filteredBundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(unfilteredBundle.meta(0)));
    // Verify that the filter has split the columns represented by s into the
    // bundle's first column.
    Object obj = filteredBundle.data(0, 0);
    assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
    DoubleVector d = (DoubleVector) obj;
    assertEquals("Unexpected dimensionality", s_int, d.getDimensionality());
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 42 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class ArffParserTest method dense.

@Test
public void dense() throws IOException {
    String filename = UNITTEST + "parsertest.arff";
    Parser parser = new ELKIBuilder<>(ArffParser.class).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.CLASSLABEL.isAssignableFromType(bundle.meta(1)));
    assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(2)));
    assertTrue("Test file not as expected", TypeUtil.EXTERNALID.isAssignableFromType(bundle.meta(3)));
    assertEquals("Length", 11, bundle.dataLength());
    assertEquals("Length", 4, ((NumberVector) bundle.data(0, 0)).getDimensionality());
    // Dense missing values are supposed to be NaN
    NumberVector nv = (NumberVector) bundle.data(10, 0);
    assertTrue("Expected NaN for missing data", Double.isNaN(nv.doubleValue(1)));
    assertTrue("Expected NaN for missing data", Double.isNaN(nv.doubleValue(3)));
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", DoubleVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", SimpleClassLabel.class, bundle.data(0, 1).getClass());
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 43 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class LibSVMFormatParserTest method parameters.

@Test
public void parameters() throws IOException {
    String filename = UNITTEST + "parsertest.libsvm";
    Parser parser = // 
    new ELKIBuilder<>(LibSVMFormatParser.class).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
    assertEquals("Length", 4, bundle.dataLength());
    assertEquals("Length", 4, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", SparseFloatVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
Also used : InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 44 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class SparseNumberVectorLabelParserTest method parameters.

@Test
public void parameters() throws IOException {
    String filename = UNITTEST + "parsertest.sparse";
    Parser parser = // 
    new ELKIBuilder<>(SparseNumberVectorLabelParser.class).with(NumberVectorLabelParser.Parameterizer.VECTOR_TYPE_ID, // 
    SparseDoubleVector.Factory.class).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
    assertEquals("Length", 3, bundle.dataLength());
    assertEquals("Length", 4, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", SparseDoubleVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 45 with MultipleObjectsBundle

use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.

the class TermFrequencyParserTest method parameters.

@Test
public void parameters() throws IOException {
    String filename = UNITTEST + "parsertest.tf";
    Parser parser = // 
    new ELKIBuilder<>(TermFrequencyParser.class).with(// 
    TermFrequencyParser.Parameterizer.NORMALIZE_FLAG).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
    assertEquals("Length", 2, bundle.dataLength());
    assertEquals("Length", 2, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
    assertEquals("Length", 4, ((SparseNumberVector) bundle.data(1, 0)).getDimensionality());
    // TODO: the map of words to columns is currently NOT kept.
    // Add this, and test this.
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", SparseFloatVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Aggregations

MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)72 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)37 Test (org.junit.Test)37 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)27 ArrayList (java.util.ArrayList)19 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)8 List (java.util.List)7 LabelList (de.lmu.ifi.dbs.elki.data.LabelList)5 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)5 InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)5 InputStream (java.io.InputStream)5 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)4 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)4