use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class SplitNumberVectorFilterTest method parameters.
/**
* Test with parameter s as a list of the columns to split into the first
* bundle column.
*/
@Test
public void parameters() {
String s = "0,1,2,3,4";
int s_int = 5;
String filename = UNITTEST + "dimensionality-test-1.csv";
SplitNumberVectorFilter<DoubleVector> filter = //
new ELKIBuilder<>(SplitNumberVectorFilter.class).with(SplitNumberVectorFilter.Parameterizer.SELECTED_ATTRIBUTES_ID, s).build();
MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
// Load the test data again without a filter.
MultipleObjectsBundle unfilteredBundle = readBundle(filename);
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(filteredBundle.meta(0)));
assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(unfilteredBundle.meta(0)));
// Verify that the filter has split the columns represented by s into the
// bundle's first column.
Object obj = filteredBundle.data(0, 0);
assertEquals("Unexpected data type", DoubleVector.class, obj.getClass());
DoubleVector d = (DoubleVector) obj;
assertEquals("Unexpected dimensionality", s_int, d.getDimensionality());
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class ArffParserTest method dense.
@Test
public void dense() throws IOException {
String filename = UNITTEST + "parsertest.arff";
Parser parser = new ELKIBuilder<>(ArffParser.class).build();
MultipleObjectsBundle bundle;
try (InputStream is = open(filename);
InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
bundle = dbc.loadData();
}
// Ensure that the filter has correctly formed the bundle.
// We expect that the bundle's first column is a number vector field.
// We expect that the bundle's second column is a LabelList
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
assertTrue("Test file not as expected", TypeUtil.CLASSLABEL.isAssignableFromType(bundle.meta(1)));
assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(2)));
assertTrue("Test file not as expected", TypeUtil.EXTERNALID.isAssignableFromType(bundle.meta(3)));
assertEquals("Length", 11, bundle.dataLength());
assertEquals("Length", 4, ((NumberVector) bundle.data(0, 0)).getDimensionality());
// Dense missing values are supposed to be NaN
NumberVector nv = (NumberVector) bundle.data(10, 0);
assertTrue("Expected NaN for missing data", Double.isNaN(nv.doubleValue(1)));
assertTrue("Expected NaN for missing data", Double.isNaN(nv.doubleValue(3)));
// Ensure that the third column are the LabelList objects.
assertEquals("Unexpected data type", DoubleVector.class, bundle.data(0, 0).getClass());
assertEquals("Unexpected data type", SimpleClassLabel.class, bundle.data(0, 1).getClass());
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class LibSVMFormatParserTest method parameters.
@Test
public void parameters() throws IOException {
String filename = UNITTEST + "parsertest.libsvm";
Parser parser = //
new ELKIBuilder<>(LibSVMFormatParser.class).build();
MultipleObjectsBundle bundle;
try (InputStream is = open(filename);
InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
bundle = dbc.loadData();
}
// Ensure that the filter has correctly formed the bundle.
// We expect that the bundle's first column is a number vector field.
// We expect that the bundle's second column is a LabelList
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
assertEquals("Length", 4, bundle.dataLength());
assertEquals("Length", 4, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
// Ensure that the third column are the LabelList objects.
assertEquals("Unexpected data type", SparseFloatVector.class, bundle.data(0, 0).getClass());
assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class SparseNumberVectorLabelParserTest method parameters.
@Test
public void parameters() throws IOException {
String filename = UNITTEST + "parsertest.sparse";
Parser parser = //
new ELKIBuilder<>(SparseNumberVectorLabelParser.class).with(NumberVectorLabelParser.Parameterizer.VECTOR_TYPE_ID, //
SparseDoubleVector.Factory.class).build();
MultipleObjectsBundle bundle;
try (InputStream is = open(filename);
InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
bundle = dbc.loadData();
}
// Ensure that the filter has correctly formed the bundle.
// We expect that the bundle's first column is a number vector field.
// We expect that the bundle's second column is a LabelList
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
assertEquals("Length", 3, bundle.dataLength());
assertEquals("Length", 4, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
// Ensure that the third column are the LabelList objects.
assertEquals("Unexpected data type", SparseDoubleVector.class, bundle.data(0, 0).getClass());
assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
use of de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle in project elki by elki-project.
the class TermFrequencyParserTest method parameters.
@Test
public void parameters() throws IOException {
String filename = UNITTEST + "parsertest.tf";
Parser parser = //
new ELKIBuilder<>(TermFrequencyParser.class).with(//
TermFrequencyParser.Parameterizer.NORMALIZE_FLAG).build();
MultipleObjectsBundle bundle;
try (InputStream is = open(filename);
InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
bundle = dbc.loadData();
}
// Ensure that the filter has correctly formed the bundle.
// We expect that the bundle's first column is a number vector field.
// We expect that the bundle's second column is a LabelList
// Ensure the first column are the vectors.
assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
assertEquals("Length", 2, bundle.dataLength());
assertEquals("Length", 2, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
assertEquals("Length", 4, ((SparseNumberVector) bundle.data(1, 0)).getDimensionality());
// TODO: the map of words to columns is currently NOT kept.
// Add this, and test this.
// Ensure that the third column are the LabelList objects.
assertEquals("Unexpected data type", SparseFloatVector.class, bundle.data(0, 0).getClass());
assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
Aggregations