Search in sources :

Example 1 with Index

use of de.lmu.ifi.dbs.elki.index.Index in project elki by elki-project.

the class GeoIndexing method main.

public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    Random rand = new Random(0L);
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[100000][];
    for (int i = 0; i < data.length; i++) {
        data[i] = randomLatitudeLongitude(rand);
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Since the R-tree has so many options, it is a bit easier to configure it
    // using the parameterization API, which handles defaults, instantiation,
    // and additional constraint checks.
    RStarTreeFactory<?> indexfactory = // 
    new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, // 
    512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, // 
    SortTileRecursiveBulkSplit.class).build();
    // Create the database, and initialize it.
    Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
    // This will build the index of the database.
    db.initialize();
    // Relation containing the number vectors we put in above:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We can use this to identify rows of the input data below.
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // For all indexes, dump their statistics.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
    // We use the WGS84 earth model, and "latitude, longitude" coordinates:
    // This distance function returns meters.
    LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
    // k nearest neighbor query:
    KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
    // Let's find the closest points to New York:
    DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
    KNNList knns = knnq.getKNNForObject(newYork, 10);
    // Iterate over all results.
    System.out.println("Close to New York:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // Many other indexes will fail if we search close to the date line.
    DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
    knns = knnq.getKNNForObject(tuvalu, 10);
    // Iterate over all results.
    System.out.println("Close to Tuvalu:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // the distances to a few points in the data set.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) SortTileRecursiveBulkSplit(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk.SortTileRecursiveBulkSplit) Index(de.lmu.ifi.dbs.elki.index.Index) LatLngDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) RStarTreeFactory(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 2 with Index

use of de.lmu.ifi.dbs.elki.index.Index in project elki by elki-project.

the class StaticArrayDatabase method initialize.

/**
 * Initialize the database by getting the initial data from the database
 * connection.
 */
@Override
public void initialize() {
    if (databaseConnection != null) {
        if (LOG.isDebugging()) {
            LOG.debugFine("Loading data from database connection.");
        }
        MultipleObjectsBundle bundle = databaseConnection.loadData();
        // Run at most once.
        databaseConnection = null;
        // Find DBIDs for bundle
        {
            DBIDs bids = bundle.getDBIDs();
            if (bids instanceof ArrayStaticDBIDs) {
                this.ids = (ArrayStaticDBIDs) bids;
            } else if (bids == null) {
                this.ids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
            } else {
                this.ids = (ArrayStaticDBIDs) DBIDUtil.makeUnmodifiable(bids);
            }
        }
        // Replace id representation (it would be nicer if we would not need
        // DBIDView at all)
        this.idrep = new DBIDView(this.ids);
        relations.add(this.idrep);
        getHierarchy().add(this, idrep);
        DBIDArrayIter it = this.ids.iter();
        int numrel = bundle.metaLength();
        for (int i = 0; i < numrel; i++) {
            SimpleTypeInformation<?> meta = bundle.meta(i);
            @SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
            WritableDataStore<Object> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, ometa.getRestrictionClass());
            for (it.seek(0); it.valid(); it.advance()) {
                store.put(it, bundle.data(it.getOffset(), i));
            }
            Relation<?> relation = new MaterializedRelation<>(ometa, ids, null, store);
            relations.add(relation);
            getHierarchy().add(this, relation);
            // Try to add indexes where appropriate
            for (IndexFactory<?, ?> factory : indexFactories) {
                if (factory.getInputTypeRestriction().isAssignableFromType(ometa)) {
                    @SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
                    @SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
                    final Index index = ofact.instantiate(orep);
                    Duration duration = LOG.isStatistics() ? LOG.newDuration(index.getClass().getName() + ".construction").begin() : null;
                    index.initialize();
                    if (duration != null) {
                        LOG.statistics(duration.end());
                    }
                    getHierarchy().add(relation, index);
                }
            }
        }
        // fire insertion event
        eventManager.fireObjectsInserted(ids);
    }
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) ArrayStaticDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) IndexFactory(de.lmu.ifi.dbs.elki.index.IndexFactory) DBIDView(de.lmu.ifi.dbs.elki.database.relation.DBIDView)

Example 3 with Index

use of de.lmu.ifi.dbs.elki.index.Index in project elki by elki-project.

the class HashmapDatabase method addNewRelation.

/**
 * Add a new representation for the given meta.
 *
 * @param meta meta data
 * @return new representation
 */
private Relation<?> addNewRelation(SimpleTypeInformation<?> meta) {
    @SuppressWarnings("unchecked") SimpleTypeInformation<Object> ometa = (SimpleTypeInformation<Object>) meta;
    Relation<?> relation = new MaterializedRelation<>(ometa, ids);
    relations.add(relation);
    getHierarchy().add(this, relation);
    // Try to add indexes where appropriate
    for (IndexFactory<?, ?> factory : indexFactories) {
        if (factory.getInputTypeRestriction().isAssignableFromType(meta)) {
            @SuppressWarnings("unchecked") final IndexFactory<Object, ?> ofact = (IndexFactory<Object, ?>) factory;
            @SuppressWarnings("unchecked") final Relation<Object> orep = (Relation<Object>) relation;
            Index index = ofact.instantiate(orep);
            index.initialize();
            getHierarchy().add(relation, index);
        }
    }
    return relation;
}
Also used : ModifiableRelation(de.lmu.ifi.dbs.elki.database.relation.ModifiableRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) IndexFactory(de.lmu.ifi.dbs.elki.index.IndexFactory) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) Index(de.lmu.ifi.dbs.elki.index.Index) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 4 with Index

use of de.lmu.ifi.dbs.elki.index.Index in project elki by elki-project.

the class AlgorithmStep method runAlgorithms.

/**
 * Run algorithms.
 *
 * @param database Database
 * @return Algorithm result
 */
public Result runAlgorithms(Database database) {
    ResultHierarchy hier = database.getHierarchy();
    if (LOG.isStatistics()) {
        boolean first = true;
        for (It<Index> it = hier.iterDescendants(database).filter(Index.class); it.valid(); it.advance()) {
            if (first) {
                LOG.statistics("Index statistics before running algorithms:");
                first = false;
            }
            it.get().logStatistics();
        }
    }
    stepresult = new BasicResult("Algorithm Step", "algorithm-step");
    for (Algorithm algorithm : algorithms) {
        Thread.currentThread().setName(algorithm.toString());
        Duration duration = LOG.isStatistics() ? LOG.newDuration(algorithm.getClass().getName() + ".runtime").begin() : null;
        Result res = algorithm.run(database);
        if (duration != null) {
            LOG.statistics(duration.end());
        }
        if (LOG.isStatistics()) {
            boolean first = true;
            for (It<Index> it = hier.iterDescendants(database).filter(Index.class); it.valid(); it.advance()) {
                if (first) {
                    LOG.statistics("Index statistics after running algorithm " + algorithm.toString() + ":");
                    first = false;
                }
                it.get().logStatistics();
            }
        }
        if (res != null) {
            // Make sure the result is attached, but usually this is a noop:
            hier.add(database, res);
        }
    }
    return stepresult;
}
Also used : ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) Index(de.lmu.ifi.dbs.elki.index.Index) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) Algorithm(de.lmu.ifi.dbs.elki.algorithm.Algorithm) AbstractAlgorithm(de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm) Result(de.lmu.ifi.dbs.elki.result.Result) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult)

Aggregations

Index (de.lmu.ifi.dbs.elki.index.Index)4 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)2 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)2 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)2 IndexFactory (de.lmu.ifi.dbs.elki.index.IndexFactory)2 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)2 AbstractAlgorithm (de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm)1 Algorithm (de.lmu.ifi.dbs.elki.algorithm.Algorithm)1 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 Database (de.lmu.ifi.dbs.elki.database.Database)1 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)1 ArrayStaticDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayStaticDBIDs)1 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)1 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 DBIDView (de.lmu.ifi.dbs.elki.database.relation.DBIDView)1 ModifiableRelation (de.lmu.ifi.dbs.elki.database.relation.ModifiableRelation)1