Search in sources :

Example 71 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class LiftTest method testToyExample.

@Test
public void testToyExample() {
    Database db = loadTransactions(UNITTEST + "itemsets/increasing5.txt", 5);
    AssociationRuleResult res = // 
    new ELKIBuilder<>(AssociationRuleGeneration.class).with(FPGrowth.Parameterizer.MINSUPP_ID, // 
    2).with(AssociationRuleGeneration.Parameterizer.MINMEASURE_ID, // 
    1.5).with(AssociationRuleGeneration.Parameterizer.INTERESTMEASURE_ID, // 
    Lift.class).build().run(db);
    assertEquals("Size not as expected.", 18, res.getRules().size());
}
Also used : AssociationRuleResult(de.lmu.ifi.dbs.elki.result.AssociationRuleResult) AssociationRuleGeneration(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.associationrules.AssociationRuleGeneration) Database(de.lmu.ifi.dbs.elki.database.Database) Test(org.junit.Test) AbstractFrequentItemsetAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.AbstractFrequentItemsetAlgorithmTest)

Example 72 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class AddedValueTest method testToyExample.

@Test
public void testToyExample() {
    Database db = loadTransactions(UNITTEST + "itemsets/increasing5.txt", 5);
    AssociationRuleResult res = // 
    new ELKIBuilder<>(AssociationRuleGeneration.class).with(FPGrowth.Parameterizer.MINSUPP_ID, // 
    1).with(AssociationRuleGeneration.Parameterizer.MINMEASURE_ID, // 
    0.6).with(AssociationRuleGeneration.Parameterizer.INTERESTMEASURE_ID, // 
    AddedValue.class).build().run(db);
    assertEquals("Size not as expected.", 27, res.getRules().size());
}
Also used : AssociationRuleResult(de.lmu.ifi.dbs.elki.result.AssociationRuleResult) AssociationRuleGeneration(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.associationrules.AssociationRuleGeneration) Database(de.lmu.ifi.dbs.elki.database.Database) Test(org.junit.Test) AbstractFrequentItemsetAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.itemsetmining.AbstractFrequentItemsetAlgorithmTest)

Example 73 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class GeoIndexing method main.

public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    Random rand = new Random(0L);
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[100000][];
    for (int i = 0; i < data.length; i++) {
        data[i] = randomLatitudeLongitude(rand);
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Since the R-tree has so many options, it is a bit easier to configure it
    // using the parameterization API, which handles defaults, instantiation,
    // and additional constraint checks.
    RStarTreeFactory<?> indexfactory = // 
    new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, // 
    512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, // 
    SortTileRecursiveBulkSplit.class).build();
    // Create the database, and initialize it.
    Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
    // This will build the index of the database.
    db.initialize();
    // Relation containing the number vectors we put in above:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We can use this to identify rows of the input data below.
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // For all indexes, dump their statistics.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
    // We use the WGS84 earth model, and "latitude, longitude" coordinates:
    // This distance function returns meters.
    LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
    // k nearest neighbor query:
    KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
    // Let's find the closest points to New York:
    DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
    KNNList knns = knnq.getKNNForObject(newYork, 10);
    // Iterate over all results.
    System.out.println("Close to New York:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // Many other indexes will fail if we search close to the date line.
    DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
    knns = knnq.getKNNForObject(tuvalu, 10);
    // Iterate over all results.
    System.out.println("Close to Tuvalu:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // the distances to a few points in the data set.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) SortTileRecursiveBulkSplit(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk.SortTileRecursiveBulkSplit) Index(de.lmu.ifi.dbs.elki.index.Index) LatLngDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) RStarTreeFactory(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 74 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class SampleKMeansInitializationTest method testSampleKMeansInitialization.

/**
 * Run KMeans with fixed parameters and compare the result to a golden
 * standard.
 */
@Test
public void testSampleKMeansInitialization() {
    Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000);
    Clustering<?> result = // 
    new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class).with(KMeans.K_ID, // 
    5).with(KMeans.SEED_ID, // 
    8).with(KMeans.INIT_ID, // 
    SampleKMeansInitialization.class).with(SampleKMeansInitialization.Parameterizer.KMEANS_ID, // 
    KMeansHamerly.class).with(KMeans.SEED_ID, // 
    8).with(SampleKMeansInitialization.Parameterizer.SAMPLE_ID, // 
    100).build().run(db);
    testFMeasure(db, result, 0.99601);
    testClusterSizes(result, new int[] { 199, 199, 200, 201, 201 });
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest) Test(org.junit.Test)

Example 75 with Database

use of de.lmu.ifi.dbs.elki.database.Database in project elki by elki-project.

the class WithinClusterMeanDistanceQualityMeasureTest method testOverallDistance.

/**
 * Test cluster average overall distance.
 */
@Test
public void testOverallDistance() {
    Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    KMeansLloyd<DoubleVector> kmeans = // 
    new ELKIBuilder<KMeansLloyd<DoubleVector>>(KMeansLloyd.class).with(KMeans.K_ID, // 
    2).with(KMeans.INIT_ID, // 
    FirstKInitialMeans.class).build();
    // run KMeans on database
    Clustering<KMeansModel> result = kmeans.run(db);
    final NumberVectorDistanceFunction<? super DoubleVector> dist = kmeans.getDistanceFunction();
    // Test Cluster Average Overall Distance
    KMeansQualityMeasure<? super DoubleVector> overall = new WithinClusterMeanDistanceQualityMeasure();
    final double quality = overall.quality(result, dist, rel);
    assertEquals("Avarage overall distance not as expected.", 0.8888888888888888, quality, 1e-10);
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FirstKInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.FirstKInitialMeans) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Aggregations

Database (de.lmu.ifi.dbs.elki.database.Database)288 Test (org.junit.Test)240 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)151 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)102 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)85 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)69 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)50 Model (de.lmu.ifi.dbs.elki.data.model.Model)29 CutDendrogramByNumberOfClusters (de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.CutDendrogramByNumberOfClusters)26 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)14 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)11 AbstractFrequentItemsetAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.AbstractFrequentItemsetAlgorithmTest)10 AssociationRuleGeneration (de.lmu.ifi.dbs.elki.algorithm.itemsetmining.associationrules.AssociationRuleGeneration)10 AssociationRuleResult (de.lmu.ifi.dbs.elki.result.AssociationRuleResult)10 ListParameterization (de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization)10 AbstractSimpleAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)9 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)9 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)9 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)8 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)8