use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class RepresentativeUncertainClustering method run.
/**
* This run method will do the wrapping.
*
* Its called from {@link AbstractAlgorithm#run(Database)} and performs the
* call to the algorithms particular run method as well as the storing and
* comparison of the resulting Clusterings.
*
* @param database Database
* @param relation Data relation of uncertain objects
* @return Clustering result
*/
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
ResultHierarchy hierarchy = database.getHierarchy();
ArrayList<Clustering<?>> clusterings = new ArrayList<>();
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
// To collect samples
Result samples = new BasicResult("Samples", "samples");
// Step 1: Cluster sampled possible worlds:
Random rand = random.getSingleThreadedRandom();
FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
for (int i = 0; i < numsamples; i++) {
WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store.put(iter, relation.get(iter).drawSample(rand));
}
clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
LOG.incrementProcessed(sampleP);
}
LOG.ensureCompleted(sampleP);
// Step 2: perform the meta clustering (on samples only).
DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
{
Iterator<Clustering<?>> it2 = clusterings.iterator();
for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
datastore.put(iter, it2.next());
}
}
assert (rids.size() == clusterings.size());
// Build a relation, and a distance matrix.
Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
mat.initialize();
ProxyDatabase d = new ProxyDatabase(rids, crel);
d.getHierarchy().add(crel, mat);
Clustering<?> c = metaAlgorithm.run(d);
// Detach from database
d.getHierarchy().remove(d, c);
// Evaluation
Result reps = new BasicResult("Representants", "representative");
hierarchy.add(relation, reps);
DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
List<? extends Cluster<?>> cl = c.getAllClusters();
List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
for (Cluster<?> clus : cl) {
double besttau = Double.POSITIVE_INFINITY;
Clustering<?> bestc = null;
for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
double tau = 0.;
Clustering<?> curc = crel.get(it1);
for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
if (DBIDUtil.equal(it1, it2)) {
continue;
}
double di = dq.distance(curc, it2);
tau = di > tau ? di : tau;
}
// Cluster member with the least maximum distance.
if (tau < besttau) {
besttau = tau;
bestc = curc;
}
}
if (bestc == null) {
// E.g. degenerate empty clusters
continue;
}
// Global tau:
double gtau = 0.;
for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
double di = dq.distance(bestc, it2);
gtau = di > gtau ? di : gtau;
}
final double cprob = computeConfidence(clus.size(), crel.size());
// Build an evaluation result
hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
}
// Sort evaluated results by confidence:
Collections.sort(evaluated, Collections.reverseOrder());
for (DoubleObjPair<Clustering<?>> pair : evaluated) {
// Attach parent relation (= sample) to the representative samples.
for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
hierarchy.add(reps, it.get());
}
}
// Add the random samples below the representative results only:
if (keep) {
hierarchy.add(relation, samples);
} else {
hierarchy.removeSubtree(samples);
}
return c;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class MemoryDataStoreFactory method makeStorage.
@SuppressWarnings("unchecked")
@Override
public <T> WritableDataStore<T> makeStorage(DBIDs ids, int hints, Class<? super T> dataclass) {
if (Double.class.equals(dataclass)) {
return (WritableDataStore<T>) makeDoubleStorage(ids, hints);
}
if (Integer.class.equals(dataclass)) {
return (WritableDataStore<T>) makeIntegerStorage(ids, hints);
}
if (ids instanceof DBIDRange) {
DBIDRange range = (DBIDRange) ids;
Object[] data = new Object[range.size()];
return new ArrayStore<>(data, range);
} else {
return new MapIntegerDBIDStore<>(ids.size());
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class FixedDBIDsFilter method filter.
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
DBIDRange ids = DBIDFactory.FACTORY.generateStaticDBIDRange(curid, objects.dataLength());
objects.setDBIDs(ids);
curid += objects.dataLength();
return objects;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class WeightedQuickUnionRangeDBIDsTest method testWorstCase.
/**
* Worst-case with 10 nodes, from Sedgewick.
*
* We don't test runtime, but this is an interesting case nevertheless.
*/
@Test
public void testWorstCase() {
DBIDRange range = DBIDUtil.generateStaticDBIDRange(10);
UnionFind uf = new WeightedQuickUnionRangeDBIDs(range);
DBIDArrayIter i1 = range.iter(), i2 = range.iter();
assertFalse(uf.isConnected(i1.seek(0), i2.seek(1)));
uf.union(i1.seek(0), i2.seek(1));
assertTrue(uf.isConnected(i1.seek(0), i2.seek(1)));
uf.union(i1.seek(2), i2.seek(3));
assertFalse(uf.isConnected(i1.seek(0), i2.seek(2)));
uf.union(i1.seek(5), i2.seek(4));
uf.union(i1.seek(7), i2.seek(6));
uf.union(i1.seek(8), i2.seek(9));
uf.union(i1.seek(1), i2.seek(3));
assertTrue(uf.isConnected(i1.seek(0), i2.seek(2)));
uf.union(i1.seek(4), i2.seek(6));
assertTrue(uf.isConnected(i1.seek(5), i2.seek(7)));
uf.union(i1.seek(3), i2.seek(7));
assertTrue(uf.isConnected(i1.seek(0), i2.seek(4)));
assertFalse(uf.isConnected(i1.seek(0), i2.seek(9)));
uf.union(i1.seek(0), i2.seek(9));
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
assertTrue(uf.isConnected(i1.seek(i), i2.seek(j)));
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class WeightedQuickUnionRangeDBIDsTest method testRoots.
@Test
public void testRoots() {
DBIDRange range = DBIDUtil.generateStaticDBIDRange(8);
UnionFind uf = new WeightedQuickUnionRangeDBIDs(range);
DBIDArrayIter i1 = range.iter(), i2 = range.iter();
uf.union(i1.seek(0), i2.seek(1));
uf.union(i1.seek(2), i2.seek(3));
assertEquals(6, uf.getRoots().size());
uf.union(i1.seek(0), i2.seek(2));
assertEquals(5, uf.getRoots().size());
uf.union(i1.seek(4), i2.seek(5));
uf.union(i1.seek(6), i2.seek(7));
uf.union(i1.seek(4), i2.seek(6));
assertEquals(2, uf.getRoots().size());
uf.union(i1.seek(0), i2.seek(4));
assertEquals(1, uf.getRoots().size());
}
Aggregations