use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class GeoIndexing method main.
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
Random rand = new Random(0L);
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[100000][];
for (int i = 0; i < data.length; i++) {
data[i] = randomLatitudeLongitude(rand);
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Since the R-tree has so many options, it is a bit easier to configure it
// using the parameterization API, which handles defaults, instantiation,
// and additional constraint checks.
RStarTreeFactory<?> indexfactory = //
new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, //
512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, //
SortTileRecursiveBulkSplit.class).build();
// Create the database, and initialize it.
Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
// This will build the index of the database.
db.initialize();
// Relation containing the number vectors we put in above:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We can use this to identify rows of the input data below.
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// For all indexes, dump their statistics.
for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
it.get().logStatistics();
}
// We use the WGS84 earth model, and "latitude, longitude" coordinates:
// This distance function returns meters.
LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
// k nearest neighbor query:
KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
// Let's find the closest points to New York:
DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
KNNList knns = knnq.getKNNForObject(newYork, 10);
// Iterate over all results.
System.out.println("Close to New York:");
for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
// To kilometers
double km = it.doubleValue() / 1000;
System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
}
// Many other indexes will fail if we search close to the date line.
DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
knns = knnq.getKNNForObject(tuvalu, 10);
// Iterate over all results.
System.out.println("Close to Tuvalu:");
for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
// To kilometers
double km = it.doubleValue() / 1000;
System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
}
// the distances to a few points in the data set.
for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
it.get().logStatistics();
}
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class CenterOfMassMetaClustering method run.
/**
* This run method will do the wrapping.
*
* Its called from {@link AbstractAlgorithm#run(Database)} and performs the
* call to the algorithms particular run method as well as the storing and
* comparison of the resulting Clusterings.
*
* @param database Database
* @param relation Data relation of uncertain objects
* @return Clustering result
*/
public C run(Database database, Relation<? extends UncertainObject> relation) {
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
// Build a relation storing the center of mass:
WritableDataStore<DoubleVector> store1 = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store1.put(iter, relation.get(iter).getCenterOfMass());
}
return runClusteringAlgorithm(database.getHierarchy(), relation, ids, store1, dim, "Uncertain Model: Center of Mass");
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class CenterOfMassMetaClustering method runClusteringAlgorithm.
/**
* Run a clustering algorithm on a single instance.
*
* @param parent Parent result to attach to
* @param ids Object IDs to process
* @param store Input data
* @param dim Dimensionality
* @param title Title of relation
* @return Clustering result
*/
protected C runClusteringAlgorithm(ResultHierarchy hierarchy, Result parent, DBIDs ids, DataStore<DoubleVector> store, int dim, String title) {
SimpleTypeInformation<DoubleVector> t = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
Relation<DoubleVector> sample = new MaterializedRelation<>(t, ids, title, store);
ProxyDatabase d = new ProxyDatabase(ids, sample);
C clusterResult = inner.run(d);
d.getHierarchy().remove(sample);
d.getHierarchy().remove(clusterResult);
hierarchy.add(parent, sample);
hierarchy.add(sample, clusterResult);
return clusterResult;
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class SampleKMeansInitializationTest method testSampleKMeansInitialization.
/**
* Run KMeans with fixed parameters and compare the result to a golden
* standard.
*/
@Test
public void testSampleKMeansInitialization() {
Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000);
Clustering<?> result = //
new ELKIBuilder<SingleAssignmentKMeans<DoubleVector>>(SingleAssignmentKMeans.class).with(KMeans.K_ID, //
5).with(KMeans.SEED_ID, //
8).with(KMeans.INIT_ID, //
SampleKMeansInitialization.class).with(SampleKMeansInitialization.Parameterizer.KMEANS_ID, //
KMeansHamerly.class).with(KMeans.SEED_ID, //
8).with(SampleKMeansInitialization.Parameterizer.SAMPLE_ID, //
100).build().run(db);
testFMeasure(db, result, 0.99601);
testClusterSizes(result, new int[] { 199, 199, 200, 201, 201 });
}
use of de.lmu.ifi.dbs.elki.data.DoubleVector in project elki by elki-project.
the class WithinClusterMeanDistanceQualityMeasureTest method testOverallDistance.
/**
* Test cluster average overall distance.
*/
@Test
public void testOverallDistance() {
Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
KMeansLloyd<DoubleVector> kmeans = //
new ELKIBuilder<KMeansLloyd<DoubleVector>>(KMeansLloyd.class).with(KMeans.K_ID, //
2).with(KMeans.INIT_ID, //
FirstKInitialMeans.class).build();
// run KMeans on database
Clustering<KMeansModel> result = kmeans.run(db);
final NumberVectorDistanceFunction<? super DoubleVector> dist = kmeans.getDistanceFunction();
// Test Cluster Average Overall Distance
KMeansQualityMeasure<? super DoubleVector> overall = new WithinClusterMeanDistanceQualityMeasure();
final double quality = overall.quality(result, dist, rel);
assertEquals("Avarage overall distance not as expected.", 0.8888888888888888, quality, 1e-10);
}
Aggregations