use of de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection in project elki by elki-project.
the class GeoIndexing method main.
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
Random rand = new Random(0L);
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[100000][];
for (int i = 0; i < data.length; i++) {
data[i] = randomLatitudeLongitude(rand);
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Since the R-tree has so many options, it is a bit easier to configure it
// using the parameterization API, which handles defaults, instantiation,
// and additional constraint checks.
RStarTreeFactory<?> indexfactory = //
new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, //
512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, //
SortTileRecursiveBulkSplit.class).build();
// Create the database, and initialize it.
Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
// This will build the index of the database.
db.initialize();
// Relation containing the number vectors we put in above:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We can use this to identify rows of the input data below.
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// For all indexes, dump their statistics.
for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
it.get().logStatistics();
}
// We use the WGS84 earth model, and "latitude, longitude" coordinates:
// This distance function returns meters.
LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
// k nearest neighbor query:
KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
// Let's find the closest points to New York:
DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
KNNList knns = knnq.getKNNForObject(newYork, 10);
// Iterate over all results.
System.out.println("Close to New York:");
for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
// To kilometers
double km = it.doubleValue() / 1000;
System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
}
// Many other indexes will fail if we search close to the date line.
DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
knns = knnq.getKNNForObject(tuvalu, 10);
// Iterate over all results.
System.out.println("Close to Tuvalu:");
for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
// To kilometers
double km = it.doubleValue() / 1000;
System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
}
// the distances to a few points in the data set.
for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
it.get().logStatistics();
}
}
use of de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection in project elki by elki-project.
the class PassingDataToELKI method main.
/**
* Main method
*
* @param args Command line parameters (not supported)
*/
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[1000][2];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
data[i][j] = Math.random();
}
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Create a database (which may contain multiple relations!)
Database db = new StaticArrayDatabase(dbc, null);
// Load the data into the database (do NOT forget to initialize...)
db.initialize();
// Relation containing the number vectors:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We know that the ids must be a continuous range:
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// K-means should be used with squared Euclidean (least squares):
SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
// Default initialization, using global random:
// To fix the random seed, use: new RandomFactory(seed);
RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
// Textbook k-means clustering:
KMeansLloyd<NumberVector> km = new //
KMeansLloyd<>(//
dist, //
3, /* k - number of partitions */
0, /* maximum number of iterations: no limit */
init);
// K-means will automatically choose a numerical relation from the data set:
// But we could make it explicit (if there were more than one numeric
// relation!): km.run(db, rel);
Clustering<KMeansModel> c = km.run(db);
// Output all clusters:
int i = 0;
for (Cluster<KMeansModel> clu : c.getAllClusters()) {
// K-means will name all clusters "Cluster" in lack of noise support:
System.out.println("#" + i + ": " + clu.getNameAutomatic());
System.out.println("Size: " + clu.size());
System.out.println("Center: " + clu.getModel().getPrototype().toString());
// Iterate over objects:
System.out.print("Objects: ");
for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
// To get the vector use:
// NumberVector v = rel.get(it);
// Offset within our DBID range: "line number"
final int offset = ids.getOffset(it);
System.out.print(" " + offset);
// Do NOT rely on using "internalGetIndex()" directly!
}
System.out.println();
++i;
}
}
Aggregations