use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class GeoIndexing method main.
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
Random rand = new Random(0L);
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[100000][];
for (int i = 0; i < data.length; i++) {
data[i] = randomLatitudeLongitude(rand);
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Since the R-tree has so many options, it is a bit easier to configure it
// using the parameterization API, which handles defaults, instantiation,
// and additional constraint checks.
RStarTreeFactory<?> indexfactory = //
new ELKIBuilder<>(RStarTreeFactory.class).with(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, //
512).with(RStarTreeFactory.Parameterizer.BULK_SPLIT_ID, //
SortTileRecursiveBulkSplit.class).build();
// Create the database, and initialize it.
Database db = new StaticArrayDatabase(dbc, Arrays.asList(indexfactory));
// This will build the index of the database.
db.initialize();
// Relation containing the number vectors we put in above:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We can use this to identify rows of the input data below.
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// For all indexes, dump their statistics.
for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
it.get().logStatistics();
}
// We use the WGS84 earth model, and "latitude, longitude" coordinates:
// This distance function returns meters.
LatLngDistanceFunction df = new LatLngDistanceFunction(WGS84SpheroidEarthModel.STATIC);
// k nearest neighbor query:
KNNQuery<NumberVector> knnq = QueryUtil.getKNNQuery(rel, df);
// Let's find the closest points to New York:
DoubleVector newYork = DoubleVector.wrap(new double[] { 40.730610, -73.935242 });
KNNList knns = knnq.getKNNForObject(newYork, 10);
// Iterate over all results.
System.out.println("Close to New York:");
for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
// To kilometers
double km = it.doubleValue() / 1000;
System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
}
// Many other indexes will fail if we search close to the date line.
DoubleVector tuvalu = DoubleVector.wrap(new double[] { -7.4784205, 178.679924 });
knns = knnq.getKNNForObject(tuvalu, 10);
// Iterate over all results.
System.out.println("Close to Tuvalu:");
for (DoubleDBIDListIter it = knns.iter(); it.valid(); it.advance()) {
// To kilometers
double km = it.doubleValue() / 1000;
System.out.println(rel.get(it) + " distance: " + km + " km row: " + ids.getOffset(it));
}
// the distances to a few points in the data set.
for (It<Index> it = db.getHierarchy().iterDescendants(db).filter(Index.class); it.valid(); it.advance()) {
it.get().logStatistics();
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class SimpleDBIDFactory method generateStaticDBIDRange.
@Override
public synchronized DBIDRange generateStaticDBIDRange(int size) {
if (rangestart >= Integer.MAX_VALUE - size) {
throw new AbortException("DBID range allocation error - too many objects allocated!");
}
DBIDRange alloc = new IntegerDBIDRange(rangestart, size);
rangestart += size;
return alloc;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class KNNBenchmarkAlgorithm method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
// No query set - use original database.
if (queries == null) {
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
KNNList knns = knnQuery.getKNNForObject(o, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class CacheDoubleDistanceInOnDiskMatrix method run.
@Override
public void run() {
database.initialize();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
DBIDRange ids = DBIDUtil.assertRange(relation.getDBIDs());
int size = ids.size();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distances", (int) (((size + 1) * (long) size) >>> 1), LOG) : null;
try (//
OnDiskUpperTriangleMatrix matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedDoubleDistanceFunction.DOUBLE_CACHE_MAGIC, 0, ByteArrayUtil.SIZE_DOUBLE, size)) {
DBIDArrayIter id1 = ids.iter(), id2 = ids.iter();
for (; id1.valid(); id1.advance()) {
for (id2.seek(id1.getOffset()); id2.valid(); id2.advance()) {
double d = distanceQuery.distance(id1, id2);
if (debugExtraCheckSymmetry) {
double d2 = distanceQuery.distance(id2, id1);
if (Math.abs(d - d2) > 0.0000001) {
LOG.warning("Distance function doesn't appear to be symmetric!");
}
}
try {
matrix.getRecordBuffer(id1.getOffset(), id2.getOffset()).putDouble(d);
} catch (IOException e) {
throw new AbortException("Error writing distance record " + DBIDUtil.toString(id1) + "," + DBIDUtil.toString(id2) + " to matrix.", e);
}
}
if (prog != null) {
prog.setProcessed(prog.getProcessed() + (size - id1.getOffset()), LOG);
}
}
} catch (IOException e) {
throw new AbortException("Error precomputing distance matrix.", e);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class PassingDataToELKI method main.
/**
* Main method
*
* @param args Command line parameters (not supported)
*/
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[1000][2];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
data[i][j] = Math.random();
}
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Create a database (which may contain multiple relations!)
Database db = new StaticArrayDatabase(dbc, null);
// Load the data into the database (do NOT forget to initialize...)
db.initialize();
// Relation containing the number vectors:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We know that the ids must be a continuous range:
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// K-means should be used with squared Euclidean (least squares):
SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
// Default initialization, using global random:
// To fix the random seed, use: new RandomFactory(seed);
RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
// Textbook k-means clustering:
KMeansLloyd<NumberVector> km = new //
KMeansLloyd<>(//
dist, //
3, /* k - number of partitions */
0, /* maximum number of iterations: no limit */
init);
// K-means will automatically choose a numerical relation from the data set:
// But we could make it explicit (if there were more than one numeric
// relation!): km.run(db, rel);
Clustering<KMeansModel> c = km.run(db);
// Output all clusters:
int i = 0;
for (Cluster<KMeansModel> clu : c.getAllClusters()) {
// K-means will name all clusters "Cluster" in lack of noise support:
System.out.println("#" + i + ": " + clu.getNameAutomatic());
System.out.println("Size: " + clu.size());
System.out.println("Center: " + clu.getModel().getPrototype().toString());
// Iterate over objects:
System.out.print("Objects: ");
for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
// To get the vector use:
// NumberVector v = rel.get(it);
// Offset within our DBID range: "line number"
final int offset = ids.getOffset(it);
System.out.print(" " + offset);
// Do NOT rely on using "internalGetIndex()" directly!
}
System.out.println();
++i;
}
}
Aggregations