Search in sources :

Example 1 with InputStreamDatabaseConnection

use of de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection in project elki by elki-project.

the class ArffParserTest method sparse.

@Test
public void sparse() throws IOException {
    String filename = UNITTEST + "parsertest.sparse.arff";
    Parser parser = new ELKIBuilder<>(ArffParser.class).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.CLASSLABEL.isAssignableFromType(bundle.meta(1)));
    assertEquals("Length", 2, bundle.dataLength());
    assertEquals("Length", 4, ((NumberVector) bundle.data(0, 0)).getDimensionality());
    // Sparse missing values are supposed to be 0.
    NumberVector nv = (NumberVector) bundle.data(1, 0);
    assertEquals("Not 0 for missing data", 0., nv.doubleValue(0), 0.);
    assertEquals("Not 0 for missing data", 0., nv.doubleValue(2), 0.);
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", SparseDoubleVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", SimpleClassLabel.class, bundle.data(0, 1).getClass());
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 2 with InputStreamDatabaseConnection

use of de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection in project elki by elki-project.

the class MaterializedKNNAndRKNNPreprocessorTest method testPreprocessor.

@Test
public void testPreprocessor() {
    UpdatableDatabase db;
    // get database
    try (InputStream is = AbstractSimpleAlgorithmTest.open(dataset)) {
        ListParameterization params = new ListParameterization();
        // Setup parser and data loading
        NumberVectorLabelParser<DoubleVector> parser = new NumberVectorLabelParser<>(DoubleVector.FACTORY);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, new ArrayList<>(), parser);
        // We want to allow the use of indexes via "params"
        params.addParameter(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, dbc);
        db = ClassGenericsUtil.parameterizeOrAbort(HashmapDatabase.class, params);
        db.initialize();
    } catch (IOException e) {
        fail("Test data " + dataset + " not found.");
        return;
    }
    Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC);
    // verify data set size.
    assertEquals("Data set size doesn't match parameters.", shoulds, rep.size());
    // get linear queries
    LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
    LinearScanRKNNQuery<DoubleVector> lin_rknn_query = new LinearScanRKNNQuery<>(distanceQuery, lin_knn_query, k);
    // get preprocessed queries
    ListParameterization config = new ListParameterization();
    config.addParameter(MaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, distanceQuery.getDistanceFunction());
    config.addParameter(MaterializeKNNPreprocessor.Factory.K_ID, k);
    MaterializeKNNAndRKNNPreprocessor<DoubleVector> preproc = new MaterializeKNNAndRKNNPreprocessor<>(rep, distanceQuery.getDistanceFunction(), k);
    KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
    RKNNQuery<DoubleVector> preproc_rknn_query = preproc.getRKNNQuery(distanceQuery);
    // add as index
    db.getHierarchy().add(rep, preproc);
    assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
    assertFalse("Preprocessor rknn query class incorrect.", preproc_rknn_query instanceof LinearScanDistanceKNNQuery);
    // test queries
    testKNNQueries(rep, lin_knn_query, preproc_knn_query, k);
    testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k);
    // also test partial queries, forward only
    testKNNQueries(rep, lin_knn_query, preproc_knn_query, k / 2);
    // insert new objects
    List<DoubleVector> insertions = new ArrayList<>();
    NumberVector.Factory<DoubleVector> o = RelationUtil.getNumberVectorFactory(rep);
    int dim = RelationUtil.dimensionality(rep);
    Random random = new Random(seed);
    for (int i = 0; i < updatesize; i++) {
        DoubleVector obj = VectorUtil.randomVector(o, dim, random);
        insertions.add(obj);
    }
    // System.out.println("Insert " + insertions);
    DBIDs deletions = db.insert(MultipleObjectsBundle.makeSimple(rep.getDataTypeInformation(), insertions));
    // test queries
    testKNNQueries(rep, lin_knn_query, preproc_knn_query, k);
    testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k);
    // delete objects
    // System.out.println("Delete " + deletions);
    db.delete(deletions);
    // test queries
    testKNNQueries(rep, lin_knn_query, preproc_knn_query, k);
    testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k);
}
Also used : UpdatableDatabase(de.lmu.ifi.dbs.elki.database.UpdatableDatabase) ArrayList(java.util.ArrayList) NumberVectorLabelParser(de.lmu.ifi.dbs.elki.datasource.parser.NumberVectorLabelParser) Random(java.util.Random) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) LinearScanDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery) InputStream(java.io.InputStream) HashmapDatabase(de.lmu.ifi.dbs.elki.database.HashmapDatabase) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) IOException(java.io.IOException) MaterializeKNNAndRKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNAndRKNNPreprocessor) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) LinearScanRKNNQuery(de.lmu.ifi.dbs.elki.database.query.rknn.LinearScanRKNNQuery) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest) Test(org.junit.Test)

Example 3 with InputStreamDatabaseConnection

use of de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection in project elki by elki-project.

the class TermFrequencyParserTest method testDBLPData.

@Test
public void testDBLPData() throws IOException {
    InputStream is = AbstractSimpleAlgorithmTest.open(DBLP_DATA);
    // Setup parser and data loading
    TermFrequencyParser<SparseDoubleVector> parser = new TermFrequencyParser<>(false, SparseDoubleVector.FACTORY);
    InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser);
    ListParameterization config = new ListParameterization();
    config.addParameter(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, dbc);
    Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, config);
    if (config.hasUnusedParameters()) {
        fail("Unused parameters: " + config.getRemainingParameters());
    }
    if (config.hasErrors()) {
        config.logAndClearReportedErrors();
        fail("Parameterization errors.");
    }
    db.initialize();
    Relation<SparseNumberVector> rel = db.getRelation(TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH);
    // Get first three objects:
    DBIDIter iter = rel.iterDBIDs();
    SparseNumberVector v1 = rel.get(iter);
    iter.advance();
    SparseNumberVector v2 = rel.get(iter);
    iter.advance();
    SparseNumberVector v3 = rel.get(iter);
    // "Dense" euclidean distance:
    double euclid1_12 = EuclideanDistanceFunction.STATIC.distance(v1, v2);
    double euclid1_13 = EuclideanDistanceFunction.STATIC.distance(v1, v3);
    double euclid1_23 = EuclideanDistanceFunction.STATIC.distance(v2, v3);
    double euclid1_21 = EuclideanDistanceFunction.STATIC.distance(v2, v1);
    // Sparse euclidean distance:
    double euclid2_12 = SparseEuclideanDistanceFunction.STATIC.distance(v1, v2);
    double euclid2_13 = SparseEuclideanDistanceFunction.STATIC.distance(v1, v3);
    double euclid2_23 = SparseEuclideanDistanceFunction.STATIC.distance(v2, v3);
    double euclid2_21 = SparseEuclideanDistanceFunction.STATIC.distance(v2, v1);
    // (Auto-switching) angular distance:
    double arccos_12 = ArcCosineDistanceFunction.STATIC.distance(v1, v2);
    double arccos_13 = ArcCosineDistanceFunction.STATIC.distance(v1, v3);
    double arccos_23 = ArcCosineDistanceFunction.STATIC.distance(v2, v3);
    double arccos_21 = ArcCosineDistanceFunction.STATIC.distance(v2, v1);
    assertEquals("Euclidean self-distance is not 0.", 0., EuclideanDistanceFunction.STATIC.distance(v1, v1), Double.MIN_VALUE);
    assertEquals("Sparse Euclidean self-distance is not 0.", 0., SparseEuclideanDistanceFunction.STATIC.distance(v1, v1), Double.MIN_VALUE);
    assertEquals("Arccos self-distance is not 0.", 0., ArcCosineDistanceFunction.STATIC.distance(v1, v1), Double.MIN_VALUE);
    assertEquals("Euclidean distance not symmetric.", euclid1_12, euclid1_21, Double.MIN_VALUE);
    assertEquals("Sparse Euclidean distance not symmetric.", euclid2_12, euclid2_21, Double.MIN_VALUE);
    assertEquals("Arccos distance not symmetric.", arccos_12, arccos_21, Double.MIN_VALUE);
    assertEquals("Euclidean distance 1-2 not as expected.", 684.4165398352088, euclid1_12, 1e-20);
    assertEquals("Sparse Euclidean distance 1-2 not as expected.", 684.4165398352088, euclid2_12, 1e-20);
    assertEquals("Arccos distance 1-2 not as expected.", 0.1901934493141418, arccos_12, 1e-20);
    assertEquals("Euclidean distance 1-3 not as expected.", 654.9862593978594, euclid1_13, 1e-20);
    assertEquals("Sparse Euclidean distance 1-3 not as expected.", 654.9862593978594, euclid2_13, 1e-20);
    assertEquals("Arccos distance 1-3 not as expected.", 0.18654347641726046, arccos_13, 1e-20);
    assertEquals("Euclidean distance 2-3 not as expected.", 231.78653972998518, euclid1_23, 1e-20);
    assertEquals("Sparse Euclidean distance 2-3 not as expected.", 231.78653972998518, euclid2_23, 1e-20);
    assertEquals("Arccos distance 2-3 not as expected.", 0.11138352337990569, arccos_23, 1e-20);
}
Also used : InputStream(java.io.InputStream) Database(de.lmu.ifi.dbs.elki.database.Database) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) SparseNumberVector(de.lmu.ifi.dbs.elki.data.SparseNumberVector) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) SparseDoubleVector(de.lmu.ifi.dbs.elki.data.SparseDoubleVector) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 4 with InputStreamDatabaseConnection

use of de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection in project elki by elki-project.

the class ArffParserTest method dense.

@Test
public void dense() throws IOException {
    String filename = UNITTEST + "parsertest.arff";
    Parser parser = new ELKIBuilder<>(ArffParser.class).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.CLASSLABEL.isAssignableFromType(bundle.meta(1)));
    assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(2)));
    assertTrue("Test file not as expected", TypeUtil.EXTERNALID.isAssignableFromType(bundle.meta(3)));
    assertEquals("Length", 11, bundle.dataLength());
    assertEquals("Length", 4, ((NumberVector) bundle.data(0, 0)).getDimensionality());
    // Dense missing values are supposed to be NaN
    NumberVector nv = (NumberVector) bundle.data(10, 0);
    assertTrue("Expected NaN for missing data", Double.isNaN(nv.doubleValue(1)));
    assertTrue("Expected NaN for missing data", Double.isNaN(nv.doubleValue(3)));
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", DoubleVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", SimpleClassLabel.class, bundle.data(0, 1).getClass());
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Example 5 with InputStreamDatabaseConnection

use of de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection in project elki by elki-project.

the class LibSVMFormatParserTest method parameters.

@Test
public void parameters() throws IOException {
    String filename = UNITTEST + "parsertest.libsvm";
    Parser parser = // 
    new ELKIBuilder<>(LibSVMFormatParser.class).build();
    MultipleObjectsBundle bundle;
    try (InputStream is = open(filename);
        InputStreamDatabaseConnection dbc = new InputStreamDatabaseConnection(is, null, parser)) {
        bundle = dbc.loadData();
    }
    // Ensure that the filter has correctly formed the bundle.
    // We expect that the bundle's first column is a number vector field.
    // We expect that the bundle's second column is a LabelList
    // Ensure the first column are the vectors.
    assertTrue("Test file not as expected", TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(bundle.meta(0)));
    assertTrue("Test file not as expected", TypeUtil.LABELLIST.isAssignableFromType(bundle.meta(1)));
    assertEquals("Length", 4, bundle.dataLength());
    assertEquals("Length", 4, ((SparseNumberVector) bundle.data(0, 0)).getDimensionality());
    // Ensure that the third column are the LabelList objects.
    assertEquals("Unexpected data type", SparseFloatVector.class, bundle.data(0, 0).getClass());
    assertEquals("Unexpected data type", LabelList.class, bundle.data(0, 1).getClass());
}
Also used : InputStream(java.io.InputStream) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) InputStreamDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection) Test(org.junit.Test) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)

Aggregations

InputStreamDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.InputStreamDatabaseConnection)8 InputStream (java.io.InputStream)8 Test (org.junit.Test)7 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)5 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)5 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)3 AbstractSimpleAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)2 AbstractDatabase (de.lmu.ifi.dbs.elki.database.AbstractDatabase)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)2 ListParameterization (de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 SparseDoubleVector (de.lmu.ifi.dbs.elki.data.SparseDoubleVector)1 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)1 HashmapDatabase (de.lmu.ifi.dbs.elki.database.HashmapDatabase)1 UpdatableDatabase (de.lmu.ifi.dbs.elki.database.UpdatableDatabase)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1