Search in sources :

Example 1 with StaticArrayDatabase

use of de.lmu.ifi.dbs.elki.database.StaticArrayDatabase in project elki by elki-project.

the class GeoIndexing method main.

public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    Random rand = new Random(0L);
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[100000][];
    for (int i = 0; i < data.length; i++) {
        data[i] = randomLatitudeLongitude(rand);
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Since the R-tree has so many options, it is a bit easier to configure it
    // using the parameterization API, which handles defaults, instantiation,
    // and additional constraint checks.
    RStarTreeFactory<?> indexfactory = // 
    new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, // 
    512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, // 
    SortTileRecursiveBulkSplit.class).build();
    // Create the database, and initialize it.
    Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
    // This will build the index of the database.
    db.initialize();
    // Relation containing the number vectors we put in above:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We can use this to identify rows of the input data below.
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // For all indexes, dump their statistics.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
    // We use the WGS84 earth model, and "latitude, longitude" coordinates:
    // This distance function returns meters.
    LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
    // k nearest neighbor query:
    KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
    // Let's find the closest points to New York:
    DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
    KNNList knns = knnq.getKNNForObject(newYork, 10);
    // Iterate over all results.
    System.out.println("Close to New York:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // Many other indexes will fail if we search close to the date line.
    DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
    knns = knnq.getKNNForObject(tuvalu, 10);
    // Iterate over all results.
    System.out.println("Close to Tuvalu:");
    for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
        // To kilometers
        double km = it.doubleValue() / 1000;
        System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
    }
    // the distances to a few points in the data set.
    for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
        it.get().logStatistics();
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) SortTileRecursiveBulkSplit(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.bulk.SortTileRecursiveBulkSplit) Index(de.lmu.ifi.dbs.elki.index.Index) LatLngDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction) Random(java.util.Random) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) RStarTreeFactory(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 2 with StaticArrayDatabase

use of de.lmu.ifi.dbs.elki.database.StaticArrayDatabase in project elki by elki-project.

the class PassingDataToELKI method main.

/**
 * Main method
 *
 * @param args Command line parameters (not supported)
 */
public static void main(String[] args) {
    // Set the logging level to statistics:
    LoggingConfiguration.setStatistics();
    // Generate a random data set.
    // Note: ELKI has a nice data generator class, use that instead.
    double[][] data = new double[1000][2];
    for (int i = 0; i < data.length; i++) {
        for (int j = 0; j < data[i].length; j++) {
            data[i][j] = Math.random();
        }
    }
    // Adapter to load data from an existing array.
    DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
    // Create a database (which may contain multiple relations!)
    Database db = new StaticArrayDatabase(dbc, null);
    // Load the data into the database (do NOT forget to initialize...)
    db.initialize();
    // Relation containing the number vectors:
    Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // We know that the ids must be a continuous range:
    DBIDRange ids = (DBIDRange) rel.getDBIDs();
    // K-means should be used with squared Euclidean (least squares):
    SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
    // Default initialization, using global random:
    // To fix the random seed, use: new RandomFactory(seed);
    RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
    // Textbook k-means clustering:
    KMeansLloyd<NumberVector> km = new // 
    KMeansLloyd<>(// 
    dist, // 
    3, /* k - number of partitions */
    0, /* maximum number of iterations: no limit */
    init);
    // K-means will automatically choose a numerical relation from the data set:
    // But we could make it explicit (if there were more than one numeric
    // relation!): km.run(db, rel);
    Clustering<KMeansModel> c = km.run(db);
    // Output all clusters:
    int i = 0;
    for (Cluster<KMeansModel> clu : c.getAllClusters()) {
        // K-means will name all clusters "Cluster" in lack of noise support:
        System.out.println("#" + i + ": " + clu.getNameAutomatic());
        System.out.println("Size: " + clu.size());
        System.out.println("Center: " + clu.getModel().getPrototype().toString());
        // Iterate over objects:
        System.out.print("Objects: ");
        for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
            // To get the vector use:
            // NumberVector v = rel.get(it);
            // Offset within our DBID range: "line number"
            final int offset = ids.getOffset(it);
            System.out.print(" " + offset);
        // Do NOT rely on using "internalGetIndex()" directly!
        }
        System.out.println();
        ++i;
    }
}
Also used : KMeansLloyd(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) RandomlyGeneratedInitialMeans(de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SquaredEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) ArrayAdapterDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection) DatabaseConnection(de.lmu.ifi.dbs.elki.datasource.DatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Example 3 with StaticArrayDatabase

use of de.lmu.ifi.dbs.elki.database.StaticArrayDatabase in project elki by elki-project.

the class ClassifierHoldoutEvaluationTask method run.

@Override
public void run() {
    Duration ptime = LOG.newDuration("evaluation.time.load").begin();
    MultipleObjectsBundle allData = databaseConnection.loadData();
    holdout.initialize(allData);
    LOG.statistics(ptime.end());
    Duration time = LOG.newDuration("evaluation.time.total").begin();
    ArrayList<ClassLabel> labels = holdout.getLabels();
    int[][] confusion = new int[labels.size()][labels.size()];
    for (int p = 0; p < holdout.numberOfPartitions(); p++) {
        TrainingAndTestSet partition = holdout.nextPartitioning();
        // Load the data set into a database structure (for indexing)
        Duration dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".init.time").begin();
        Database db = new StaticArrayDatabase(new MultipleObjectsBundleDatabaseConnection(partition.getTraining()), indexFactories);
        db.initialize();
        LOG.statistics(dur.end());
        // Train the classifier
        dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".train.time").begin();
        Relation<ClassLabel> lrel = db.getRelation(TypeUtil.CLASSLABEL);
        algorithm.buildClassifier(db, lrel);
        LOG.statistics(dur.end());
        // Evaluate the test set
        dur = LOG.newDuration(this.getClass().getName() + ".fold-" + (p + 1) + ".evaluation.time").begin();
        // FIXME: this part is still a big hack, unfortunately!
        MultipleObjectsBundle test = partition.getTest();
        int lcol = AbstractHoldout.findClassLabelColumn(test);
        int tcol = (lcol == 0) ? 1 : 0;
        for (int i = 0, l = test.dataLength(); i < l; ++i) {
            @SuppressWarnings("unchecked") O obj = (O) test.data(i, tcol);
            ClassLabel truelbl = (ClassLabel) test.data(i, lcol);
            ClassLabel predlbl = algorithm.classify(obj);
            int pred = Collections.binarySearch(labels, predlbl);
            int real = Collections.binarySearch(labels, truelbl);
            confusion[pred][real]++;
        }
        LOG.statistics(dur.end());
    }
    LOG.statistics(time.end());
    ConfusionMatrix m = new ConfusionMatrix(labels, confusion);
    LOG.statistics(m.toString());
}
Also used : ConfusionMatrix(de.lmu.ifi.dbs.elki.evaluation.classification.ConfusionMatrix) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) Duration(de.lmu.ifi.dbs.elki.logging.statistics.Duration) TrainingAndTestSet(de.lmu.ifi.dbs.elki.evaluation.classification.holdout.TrainingAndTestSet) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) AbstractDatabase(de.lmu.ifi.dbs.elki.database.AbstractDatabase) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) Database(de.lmu.ifi.dbs.elki.database.Database) MultipleObjectsBundleDatabaseConnection(de.lmu.ifi.dbs.elki.datasource.MultipleObjectsBundleDatabaseConnection) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)

Aggregations

Database (de.lmu.ifi.dbs.elki.database.Database)3 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)2 ArrayAdapterDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection)2 DatabaseConnection (de.lmu.ifi.dbs.elki.datasource.DatabaseConnection)2 KMeansLloyd (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd)1 RandomlyGeneratedInitialMeans (de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans)1 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)1 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)1 AbstractDatabase (de.lmu.ifi.dbs.elki.database.AbstractDatabase)1 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)1 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 MultipleObjectsBundleDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.MultipleObjectsBundleDatabaseConnection)1 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)1 LatLngDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.geo.LatLngDistanceFunction)1 SquaredEuclideanDistanceFunction (de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction)1 ConfusionMatrix (de.lmu.ifi.dbs.elki.evaluation.classification.ConfusionMatrix)1