Search in sources :

Example 1 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class GeoIndexing method main.

public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    Random rand = new Random(0L);
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[100000][];
    for (int i = 0; i < data.length; i++) {
        data[i] = randomLatitudeLongitude(rand);
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Since the R-tree has so many options, it is a bit easier to configure it
    // using the parameterization API, which handles defaults, instantiation,
    // and additional constraint checks.
    RStarTreeFactory<?> indexfactory = // 
    new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, // 
    512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, // 
    SortTileRecursiveBulkSplit.class).build();
    // Create the database, and initialize it.
    Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
    // This will build the index of the database.
    db.initialize();
    // Relation containing the number vectors we put in above:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We can use this to identify rows of the input data below.
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // For all indexes, dump their statistics.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
    // We use the WGS84 earth model, and "latitude, longitude" coordinates:
    // This distance function returns meters.
    LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
    // k nearest neighbor query:
    KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
    // Let's find the closest points to New York:
    DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
    KNNList knns = knnq.getKNNForObject(newYork, 10);
    // Iterate over all results.
    System.out.println("Close to New York:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // Many other indexes will fail if we search close to the date line.
    DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
    knns = knnq.getKNNForObject(tuvalu, 10);
    // Iterate over all results.
    System.out.println("Close to Tuvalu:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // the distances to a few points in the data set.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) SortTileRecursiveBulkSplit(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk.SortTileRecursiveBulkSplit) Index(de.lmu.ifi.dbs.elki.index.Index) LatLngDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) RStarTreeFactory(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 2 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class SimpleDBIDFactory method generateStaticDBIDRange.

@Override
public synchronized DBIDRange generateStaticDBIDRange(int size) {
    if (rangestart >= Integer.MAX_VALUE - size) {
        throw new AbortException("DBID range allocation error - too many objects allocated!");
    }
    DBIDRange alloc = new IntegerDBIDRange(rangestart, size);
    rangestart += size;
    return alloc;
}
Also used : DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class KNNBenchmarkAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
    // No query set - use original database.
    if (queries == null) {
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            KNNList knns = knnQuery.getKNNForObject(o, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange)

Example 4 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class CacheDoubleDistanceInOnDiskMatrix method run.

@Override
public void run() {
    database.initialize();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    DBIDRange ids = DBIDUtil.assertRange(relation.getDBIDs());
    int size = ids.size();
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distances", (int) (((size + 1) * (long) size) >>> 1), LOG) : null;
    try (// 
    OnDiskUpperTriangleMatrix matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedDoubleDistanceFunction.DOUBLE_CACHE_MAGIC, 0, ByteArrayUtil.SIZE_DOUBLE, size)) {
        DBIDArrayIter id1 = ids.iter(), id2 = ids.iter();
        for (; id1.valid(); id1.advance()) {
            for (id2.seek(id1.getOffset()); id2.valid(); id2.advance()) {
                double d = distanceQuery.distance(id1, id2);
                if (debugExtraCheckSymmetry) {
                    double d2 = distanceQuery.distance(id2, id1);
                    if (Math.abs(d - d2) > 0.0000001) {
                        LOG.warning("Distance function doesn't appear to be symmetric!");
                    }
                }
                try {
                    matrix.getRecordBuffer(id1.getOffset(), id2.getOffset()).putDouble(d);
                } catch (IOException e) {
                    throw new AbortException("Error writing distance record " + DBIDUtil.toString(id1) + "," + DBIDUtil.toString(id2) + " to matrix.", e);
                }
            }
            if (prog != null) {
                prog.setProcessed(prog.getProcessed() + (size - id1.getOffset()), LOG);
            }
        }
    } catch (IOException e) {
        throw new AbortException("Error precomputing distance matrix.", e);
    }
    LOG.ensureCompleted(prog);
}
Also used : OnDiskUpperTriangleMatrix(de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IOException(java.io.IOException) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 5 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class PassingDataToELKI method main.

/**
 * Main method
 *
 * @param args Command line parameters (not supported)
 */
public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[1000][2];
    for (int i = 0; i < data.length; i++) {
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = Math.random();
        }
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Create a database (which may contain multiple relations!)
    Database db = new StaticArrayDatabase(dbc, null);
    // Load the data into the database (do NOT forget to initialize...)
    db.initialize();
    // Relation containing the number vectors:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We know that the ids must be a continuous range:
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // K-means should be used with squared Euclidean (least squares):
    SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
    // Default initialization, using global random:
    // To fix the random seed, use: new RandomFactory(seed);
    RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
    // Textbook k-means clustering:
    KMeansLloyd<NumberVector> km = new // 
    KMeansLloyd<>(// 
    dist, // 
    3, /* k - number of partitions */
    0, /* maximum number of iterations: no limit */
    init);
    // K-means will automatically choose a numerical relation from the data set:
    // But we could make it explicit (if there were more than one numeric
    // relation!): km.run(db, rel);
    Clustering<KMeansModel> c = km.run(db);
    // Output all clusters:
    int i = 0;
    for (Cluster<KMeansModel> clu : c.getAllClusters()) {
        // K-means will name all clusters "Cluster" in lack of noise support:
        System.out.println("#" + i + ": " + clu.getNameAutomatic());
        System.out.println("Size: " + clu.size());
        System.out.println("Center: " + clu.getModel().getPrototype().toString());
        // Iterate over objects:
        System.out.print("Objects: ");
        for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
            // To get the vector use:
            // NumberVector v = rel.get(it);
            // Offset within our DBID range: "line number"
            final int offset = ids.getOffset(it);
            System.out.print(" " + offset);
        // Do NOT rely on using "internalGetIndex()" directly!
        }
        System.out.println();
        ++i;
    }
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) RandomlyGeneratedInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Aggregations

DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)24 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)13 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Test (org.junit.Test)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)6 Random (java.util.Random)4 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)3 Database (de.lmu.ifi.dbs.elki.database.Database)3 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)2 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)2 ArrayAdapterDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection)2 DatabaseConnection (de.lmu.ifi.dbs.elki.datasource.DatabaseConnection)2 OnDiskUpperTriangleMatrix (de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix)2