Search in sources :

Example 31 with DoubleVector

use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.

the class GeoIndexing method main.

public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    Random rand = new Random(0L);
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[100000][];
    for (int i = 0; i < data.length; i++) {
        data[i] = randomLatitudeLongitude(rand);
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Since the R-tree has so many options, it is a bit easier to configure it
    // using the parameterization API, which handles defaults, instantiation,
    // and additional constraint checks.
    RStarTreeFactory<?> indexfactory = // 
    new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, // 
    512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, // 
    SortTileRecursiveBulkSplit.class).build();
    // Create the database, and initialize it.
    Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
    // This will build the index of the database.
    db.initialize();
    // Relation containing the number vectors we put in above:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We can use this to identify rows of the input data below.
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // For all indexes, dump their statistics.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
    // We use the WGS84 earth model, and "latitude, longitude" coordinates:
    // This distance function returns meters.
    LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
    // k nearest neighbor query:
    KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
    // Let's find the closest points to New York:
    DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
    KNNList knns = knnq.getKNNForObject(newYork, 10);
    // Iterate over all results.
    System.out.println("Close to New York:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // Many other indexes will fail if we search close to the date line.
    DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
    knns = knnq.getKNNForObject(tuvalu, 10);
    // Iterate over all results.
    System.out.println("Close to Tuvalu:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // the distances to a few points in the data set.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) SortTileRecursiveBulkSplit(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk.SortTileRecursiveBulkSplit) Index(de.lmu.ifi.dbs.elki.index.Index) LatLngDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) RStarTreeFactory(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 32 with DoubleVector

use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.

the class CenterOfMassMetaClustering method run.

/**
 * This run method will do the wrapping.
 *
 * Its called from {@link AbstractAlgorithm#run(Database)} and performs the
 * call to the algorithms particular run method as well as the storing and
 * comparison of the resulting Clusterings.
 *
 * @param database Database
 * @param relation Data relation of uncertain objects
 * @return Clustering result
 */
public C run(Database database, Relation<? extends UncertainObject> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    // Build a relation storing the center of mass:
    WritableDataStore<DoubleVector> store1 = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        store1.put(iter, relation.get(iter).getCenterOfMass());
    }
    return runClusteringAlgorithm(database.getHierarchy(), relation, ids, store1, dim, "Uncertain Model: Center of Mass");
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 33 with DoubleVector

use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.

the class CenterOfMassMetaClustering method runClusteringAlgorithm.

/**
 * Run a clustering algorithm on a single instance.
 *
 * @param parent Parent result to attach to
 * @param ids Object IDs to process
 * @param store Input data
 * @param dim Dimensionality
 * @param title Title of relation
 * @return Clustering result
 */
protected C runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
    SimpleTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    Relation<DoubleVector> sample = new MaterializedRelation<>(t, ids, title, store);
    ProxyDatabase d = new ProxyDatabase(ids, sample);
    C clusterResult = inner.run(d);
    d.getHierarchy().remove(sample);
    d.getHierarchy().remove(clusterResult);
    hierarchy.add(parent, sample);
    hierarchy.add(sample, clusterResult);
    return clusterResult;
}
Also used : VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 34 with DoubleVector

use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.

the class SampleKMeansInitializationTest method testSampleKMeansInitialization.

/**
 * Run KMeans with fixed parameters and compare the result to a golden
 * standard.
 */
@Test
public void testSampleKMeansInitialization() {
    Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000);
    Clustering<?> result = // 
    new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class).with(KMeans.K_ID, // 
    5).with(KMeans.SEED_ID, // 
    8).with(KMeans.INIT_ID, // 
    SampleKMeansInitialization.class).with(SampleKMeansInitialization.Parameterizer.KMEANS_ID, // 
    KMeansHamerly.class).with(KMeans.SEED_ID, // 
    8).with(SampleKMeansInitialization.Parameterizer.SAMPLE_ID, // 
    100).build().run(db);
    testFMeasure(db, result, 0.99601);
    testClusterSizes(result, new int[] { 199, 199, 200, 201, 201 });
}
Also used : ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest) Test(org.junit.Test)

Example 35 with DoubleVector

use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.

the class WithinClusterMeanDistanceQualityMeasureTest method testOverallDistance.

/**
 * Test cluster average overall distance.
 */
@Test
public void testOverallDistance() {
    Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
    Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    KMeansLloyd<DoubleVector> kmeans = // 
    new ELKIBuilder<KMeansLloyd<DoubleVector>>(KMeansLloyd.class).with(KMeans.K_ID, // 
    2).with(KMeans.INIT_ID, // 
    FirstKInitialMeans.class).build();
    // run KMeans on database
    Clustering<KMeansModel> result = kmeans.run(db);
    final NumberVectorDistanceFunction<? super DoubleVector> dist = kmeans.getDistanceFunction();
    // Test Cluster Average Overall Distance
    KMeansQualityMeasure<? super DoubleVector> overall = new WithinClusterMeanDistanceQualityMeasure();
    final double quality = overall.quality(result, dist, rel);
    assertEquals("Avarage overall distance not as expected.", 0.8888888888888888, quality, 1e-10);
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) FirstKInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.FirstKInitialMeans) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) Test(org.junit.Test) AbstractClusterAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)

Aggregations

DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)147 Test (org.junit.Test)112 Database (de.lmu.ifi.dbs.elki.database.Database)85 ELKIBuilder (de.lmu.ifi.dbs.elki.utilities.ELKIBuilder)75 AbstractClusterAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractClusterAlgorithmTest)50 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)26 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)24 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)22 AbstractOutlierAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.outlier.AbstractOutlierAlgorithmTest)16 ArrayList (java.util.ArrayList)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)12 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)11 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)10 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)9 ListParameterization (de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization)9 Random (java.util.Random)9 AbstractSimpleAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 LinearScanDistanceKNNQuery (de.lmu.ifi.dbs.elki.database.query.knn.LinearScanDistanceKNNQuery)8 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)7