Search in sources :

Example 96 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class SpacefillingKNNPreprocessorTest method testSubset.

@Test
public void testSubset() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
    // get linear queries
    LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
    // get preprocessed queries
    ListParameterization config = new ListParameterization();
    // 
    config.addParameter(// 
    SpacefillingKNNPreprocessor.Factory.Parameterizer.CURVES_ID, // 
    HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.DIM_ID, 7);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.PROJECTION_ID, RandomSubsetProjectionFamily.class);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 5.);
    config.addParameter(SpacefillingKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
    config.addParameter(RandomSubsetProjectionFamily.Parameterizer.RANDOM_ID, 0L);
    SpacefillingKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingKNNPreprocessor.Factory.class, config);
    SpacefillingKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
    preproc.initialize();
    // add as index
    db.getHierarchy().add(rel, preproc);
    KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
    assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
    // test queries
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
    // also test partial queries, forward only
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
Also used : SpacefillingKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.SpacefillingKNNPreprocessor) LinearScanDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) HilbertSpatialSorter(de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 97 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class SpacefillingMaterializeKNNPreprocessorTest method testPreprocessor.

@Test
public void testPreprocessor() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> distanceQuery = db.getDistanceQuery(rel, EuclideanDistanceFunction.STATIC);
    // get linear queries
    LinearScanDistanceKNNQuery<DoubleVector> lin_knn_query = new LinearScanDistanceKNNQuery<>(distanceQuery);
    // get preprocessed queries
    ListParameterization config = new ListParameterization();
    // 
    config.addParameter(// 
    SpacefillingMaterializeKNNPreprocessor.Factory.Parameterizer.CURVES_ID, // 
    HilbertSpatialSorter.class.getName() + "," + PeanoSpatialSorter.class.getName() + "," + ZCurveSpatialSorter.class.getName() + "," + BinarySplitSpatialSorter.class.getName());
    config.addParameter(SpacefillingMaterializeKNNPreprocessor.Factory.K_ID, k);
    config.addParameter(SpacefillingMaterializeKNNPreprocessor.Factory.Parameterizer.VARIANTS_ID, 10);
    config.addParameter(SpacefillingMaterializeKNNPreprocessor.Factory.Parameterizer.WINDOW_ID, 1.);
    config.addParameter(SpacefillingMaterializeKNNPreprocessor.Factory.Parameterizer.RANDOM_ID, 0L);
    SpacefillingMaterializeKNNPreprocessor.Factory<DoubleVector> preprocf = ClassGenericsUtil.parameterizeOrAbort(SpacefillingMaterializeKNNPreprocessor.Factory.class, config);
    SpacefillingMaterializeKNNPreprocessor<DoubleVector> preproc = preprocf.instantiate(rel);
    preproc.initialize();
    // add as index
    db.getHierarchy().add(rel, preproc);
    KNNQuery<DoubleVector> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k);
    assertFalse("Preprocessor knn query class incorrect.", preproc_knn_query instanceof LinearScanDistanceKNNQuery);
    // test queries
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k);
    // also test partial queries, forward only
    testKNNQueries(rel, lin_knn_query, preproc_knn_query, k / 2);
}
Also used : SpacefillingMaterializeKNNPreprocessor(de.lmu.ifi.dbs.elki.index.preprocessed.knn.SpacefillingMaterializeKNNPreprocessor) LinearScanDistanceKNNQuery(de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) HilbertSpatialSorter(de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter) ListParameterization(de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 98 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class KDDTask method run.

/**
 * Method to run the specified algorithm using the specified database
 * connection.
 */
public void run() {
    // Input step
    Database db = inputStep.getDatabase();
    hier = db.getHierarchy();
    // Algorithms - Data Mining Step
    algorithmStep.runAlgorithms(db);
    // TODO: this could be nicer
    hier.add(db, new SettingsResult(settings));
    // Evaluation
    evaluationStep.runEvaluators(hier, db);
    // Output / Visualization
    outputStep.runResultHandlers(hier, db);
}
Also used : Database(de.lmu.ifi.dbs.elki.database.Database) SettingsResult(de.lmu.ifi.dbs.elki.result.SettingsResult)

Example 99 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class AbstractSimpleAlgorithmTest method makeSimpleDatabase.

/**
 * Generate a simple DoubleVector database from a file.
 *
 * @param filename File to load
 * @param expectedSize Expected size in records
 * @param params Extra parameters
 * @return Database
 */
public static Database makeSimpleDatabase(String filename, int expectedSize, ListParameterization params) {
    assertNotNull("Params, if given, must not be null.", params);
    // Allow loading test data from resources.
    try (InputStream is = open(filename)) {
        params.addParameter(AbstractDatabase.Parameterizer.DATABASE_CONNECTION_ID, InputStreamDatabaseConnection.class);
        params.addParameter(InputStreamDatabaseConnection.Parameterizer.STREAM_ID, is);
        Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params);
        // Ensure we have no unused parameters:
        if (params.hasUnusedParameters()) {
            fail("Unused parameters: " + params.getRemainingParameters());
        }
        // And report any parameterization errors:
        if (params.hasErrors()) {
            params.logAndClearReportedErrors();
            fail("Parameterization errors.");
        }
        db.initialize();
        // Check the relation has the expected size:
        Relation<?> rel = db.getRelation(TypeUtil.ANY);
        if (expectedSize > 0) {
            assertEquals("Database size does not match.", expectedSize, rel.size());
        }
        return db;
    } catch (IOException e) {
        fail("Test data " + filename + " not found.");
        // Not reached.
        return null;
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) InputStream(java.io.InputStream) Database(de.lmu.ifi.dbs.elki.database.Database) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) IOException(java.io.IOException)

Example 100 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class AbstractIndexStructureTest method testExactCosine.

/**
 * Actual test routine, for cosine distance
 *
 * @param inputparams
 */
protected void testExactCosine(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
    // Use a fixed DBID - historically, we used 1 indexed - to reduce random
    // variation in results due to different hash codes everywhere.
    inputparams.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, new FixedDBIDsFilter(1));
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
    Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> dist = db.getDistanceQuery(rep, CosineDistanceFunction.STATIC);
    if (expectKNNQuery != null) {
        // get the 10 next neighbors
        DoubleVector dv = DoubleVector.wrap(querypoint);
        KNNQuery<DoubleVector> knnq = db.getKNNQuery(dist, k);
        assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
        KNNList ids = knnq.getKNNForObject(dv, k);
        assertEquals("Result size does not match expectation!", cosshouldd.length, ids.size());
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", cosshouldd[i], res.doubleValue(), 1e-15);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(cosshouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
    if (expectRangeQuery != null) {
        // Do a range query
        DoubleVector dv = DoubleVector.wrap(querypoint);
        RangeQuery<DoubleVector> rangeq = db.getRangeQuery(dist, coseps);
        assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
        DoubleDBIDList ids = rangeq.getRangeForObject(dv, coseps);
        assertEquals("Result size does not match expectation!", cosshouldd.length, ids.size());
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", cosshouldd[i], res.doubleValue(), 1e-15);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(cosshouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FixedDBIDsFilter(de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Aggregations

Database (de.lmu.ifi.dbs.elki.database.Database)288 Test (org.junit.Test)240 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)151 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)102 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)85 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)69 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 Model (de.lmu.ifi.dbs.elki.data.model.Model)29 CutDendrogramByNumberOfClusters (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByNumberOfClusters)26 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)14 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)11 AbstractFrequentItemsetAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.AbstractFrequentItemsetAlgorithmTest)10 AssociationRuleGeneration (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.associationrules.AssociationRuleGeneration)10 AssociationRuleResult (de.lmu.ifi.dbs.elki.result.AssociationRuleResult)10 ListParameterization (de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization)10 AbstractSimpleAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)9 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)9 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)9 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)8 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)8