use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.
the class RepresentativeUncertainClustering method run.
/**
* This run method will do the wrapping.
*
* Its called from {@link AbstractAlgorithm#run(Database)} and performs the
* call to the algorithms particular run method as well as the storing and
* comparison of the resulting Clusterings.
*
* @param database Database
* @param relation Data relation of uncertain objects
* @return Clustering result
*/
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
ResultHierarchy hierarchy = database.getHierarchy();
ArrayList<Clustering<?>> clusterings = new ArrayList<>();
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
// To collect samples
Result samples = new BasicResult("Samples", "samples");
// Step 1: Cluster sampled possible worlds:
Random rand = random.getSingleThreadedRandom();
FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
for (int i = 0; i < numsamples; i++) {
WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store.put(iter, relation.get(iter).drawSample(rand));
}
clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
LOG.incrementProcessed(sampleP);
}
LOG.ensureCompleted(sampleP);
// Step 2: perform the meta clustering (on samples only).
DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
{
Iterator<Clustering<?>> it2 = clusterings.iterator();
for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
datastore.put(iter, it2.next());
}
}
assert (rids.size() == clusterings.size());
// Build a relation, and a distance matrix.
Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
mat.initialize();
ProxyDatabase d = new ProxyDatabase(rids, crel);
d.getHierarchy().add(crel, mat);
Clustering<?> c = metaAlgorithm.run(d);
// Detach from database
d.getHierarchy().remove(d, c);
// Evaluation
Result reps = new BasicResult("Representants", "representative");
hierarchy.add(relation, reps);
DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
List<? extends Cluster<?>> cl = c.getAllClusters();
List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
for (Cluster<?> clus : cl) {
double besttau = Double.POSITIVE_INFINITY;
Clustering<?> bestc = null;
for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
double tau = 0.;
Clustering<?> curc = crel.get(it1);
for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
if (DBIDUtil.equal(it1, it2)) {
continue;
}
double di = dq.distance(curc, it2);
tau = di > tau ? di : tau;
}
// Cluster member with the least maximum distance.
if (tau < besttau) {
besttau = tau;
bestc = curc;
}
}
if (bestc == null) {
// E.g. degenerate empty clusters
continue;
}
// Global tau:
double gtau = 0.;
for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
double di = dq.distance(bestc, it2);
gtau = di > gtau ? di : gtau;
}
final double cprob = computeConfidence(clus.size(), crel.size());
// Build an evaluation result
hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
}
// Sort evaluated results by confidence:
Collections.sort(evaluated, Collections.reverseOrder());
for (DoubleObjPair<Clustering<?>> pair : evaluated) {
// Attach parent relation (= sample) to the representative samples.
for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
hierarchy.add(reps, it.get());
}
}
// Add the random samples below the representative results only:
if (keep) {
hierarchy.add(relation, samples);
} else {
hierarchy.removeSubtree(samples);
}
return c;
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.
the class KMLOutputHandler method buildHullsRecursively.
/**
* Recursively step through the clusters to build the hulls.
*
* @param clu Current cluster
* @param hier Clustering hierarchy
* @param hulls Hull map
*/
private DoubleObjPair<Polygon> buildHullsRecursively(Cluster<Model> clu, Hierarchy<Cluster<Model>> hier, Map<Object, DoubleObjPair<Polygon>> hulls, Relation<? extends NumberVector> coords) {
final DBIDs ids = clu.getIDs();
GrahamScanConvexHull2D hull = new GrahamScanConvexHull2D();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
hull.add(coords.get(iter).toArray());
}
double weight = ids.size();
if (hier != null && hulls != null) {
final int numc = hier.numChildren(clu);
if (numc > 0) {
for (It<Cluster<Model>> iter = hier.iterChildren(clu); iter.valid(); iter.advance()) {
final Cluster<Model> iclu = iter.get();
DoubleObjPair<Polygon> poly = hulls.get(iclu);
if (poly == null) {
poly = buildHullsRecursively(iclu, hier, hulls, coords);
}
// Add inner convex hull to outer convex hull.
for (ArrayListIter<double[]> vi = poly.second.iter(); vi.valid(); vi.advance()) {
hull.add(vi.get());
}
weight += poly.first / numc;
}
}
}
DoubleObjPair<Polygon> pair = new DoubleObjPair<>(weight, hull.getHull());
hulls.put(clu, pair);
return pair;
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.
the class KMLOutputHandler method writeClusteringResult.
private void writeClusteringResult(XMLStreamWriter xmlw, Clustering<Model> clustering, Database database) throws XMLStreamException {
xmlw.writeStartDocument();
xmlw.writeCharacters("\n");
xmlw.writeStartElement("kml");
xmlw.writeDefaultNamespace("http://earth.google.com/kml/2.2");
xmlw.writeStartElement("Document");
{
// TODO: can we automatically generate more helpful data here?
xmlw.writeStartElement("name");
xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
// name
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
// TODO: e.g. list the settings in the description?
xmlw.writeStartElement("description");
xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
// description
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
List<Cluster<Model>> clusters = clustering.getAllClusters();
Relation<NumberVector> coords = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD_2D);
List<Cluster<Model>> topc = clustering.getToplevelClusters();
Hierarchy<Cluster<Model>> hier = clustering.getClusterHierarchy();
Map<Object, DoubleObjPair<Polygon>> hullmap = new HashMap<>();
for (Cluster<Model> clu : topc) {
buildHullsRecursively(clu, hier, hullmap, coords);
}
{
final double projarea = 360. * 180. * .01;
// TODO: generate styles from color scheme
Iterator<Cluster<Model>> it = clusters.iterator();
for (int i = 0; it.hasNext(); i++) {
Cluster<Model> clus = it.next();
// This is a prime based magic number, to produce a colorful output
Color col = Color.getHSBColor(i / 4.294967291f, 1.f, .5f);
DoubleObjPair<Polygon> pair = hullmap.get(clus);
// Approximate area (using bounding box)
double hullarea = SpatialUtil.volume(pair.second);
final double relativeArea = Math.max(1. - (hullarea / projarea), 0.);
// final double relativeSize = pair.first / coords.size();
final double opacity = .65 * FastMath.sqrt(relativeArea) + .1;
xmlw.writeStartElement("Style");
xmlw.writeAttribute("id", "s" + i);
writeNewlineOnDebug(xmlw);
{
xmlw.writeStartElement("LineStyle");
xmlw.writeStartElement("width");
xmlw.writeCharacters("0");
// width
xmlw.writeEndElement();
// LineStyle
xmlw.writeEndElement();
}
writeNewlineOnDebug(xmlw);
{
xmlw.writeStartElement("PolyStyle");
xmlw.writeStartElement("color");
// KML uses AABBGGRR format!
xmlw.writeCharacters(String.format("%02x%02x%02x%02x", (int) (255 * Math.min(.75, opacity)), col.getBlue(), col.getGreen(), col.getRed()));
// color
xmlw.writeEndElement();
// out.writeStartElement("fill");
// out.writeCharacters("1"); // Default 1
// out.writeEndElement(); // fill
xmlw.writeStartElement("outline");
xmlw.writeCharacters("0");
// outline
xmlw.writeEndElement();
// PolyStyle
xmlw.writeEndElement();
}
writeNewlineOnDebug(xmlw);
// Style
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
}
Cluster<?> ignore = topc.size() == 1 ? topc.get(0) : null;
Iterator<Cluster<Model>> it = clusters.iterator();
for (int cnum = 0; it.hasNext(); cnum++) {
Cluster<?> c = it.next();
// Ignore sole toplevel cluster (usually: noise)
if (c == ignore) {
continue;
}
Polygon p = hullmap.get(c).second;
xmlw.writeStartElement("Placemark");
{
xmlw.writeStartElement("name");
xmlw.writeCharacters(c.getNameAutomatic());
// name
xmlw.writeEndElement();
xmlw.writeStartElement("description");
xmlw.writeCData(makeDescription(c).toString());
// description
xmlw.writeEndElement();
xmlw.writeStartElement("styleUrl");
xmlw.writeCharacters("#s" + cnum);
// styleUrl
xmlw.writeEndElement();
}
{
xmlw.writeStartElement("Polygon");
writeNewlineOnDebug(xmlw);
if (compat) {
xmlw.writeStartElement("altitudeMode");
xmlw.writeCharacters("relativeToGround");
// close altitude mode
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
{
xmlw.writeStartElement("outerBoundaryIs");
xmlw.writeStartElement("LinearRing");
xmlw.writeStartElement("coordinates");
// Reverse anti-clockwise polygons.
boolean reverse = (p.testClockwise() >= 0);
ArrayListIter<double[]> itp = p.iter();
if (reverse) {
itp.seek(p.size() - 1);
}
while (itp.valid()) {
double[] v = itp.get();
xmlw.writeCharacters(FormatUtil.format(v, ","));
if (compat && (v.length == 2)) {
xmlw.writeCharacters(",100");
}
xmlw.writeCharacters(" ");
if (!reverse) {
itp.advance();
} else {
itp.retract();
}
}
// close coordinates
xmlw.writeEndElement();
// close LinearRing
xmlw.writeEndElement();
// close *BoundaryIs
xmlw.writeEndElement();
}
writeNewlineOnDebug(xmlw);
// Polygon
xmlw.writeEndElement();
}
// Placemark
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
// Document
xmlw.writeEndElement();
// kml
xmlw.writeEndElement();
xmlw.writeEndDocument();
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.
the class RdKNNTree method preInsert.
/**
* Adapts the knn distances before insertion of entry q.
*
* @param q the entry to be inserted
* @param nodeEntry the entry representing the root of the current subtree
* @param knns_q the knns of q
*/
private void preInsert(RdKNNEntry q, RdKNNEntry nodeEntry, KNNHeap knns_q) {
double knnDist_q = knns_q.getKNNDistance();
RdKNNNode node = getNode(nodeEntry);
double knnDist_node = 0.;
// leaf node
if (node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
RdKNNLeafEntry p = (RdKNNLeafEntry) node.getEntry(i);
double dist_pq = distanceQuery.distance(p.getDBID(), ((LeafEntry) q).getDBID());
// ==> p becomes a knn-candidate
if (dist_pq <= knnDist_q) {
knns_q.insert(dist_pq, p.getDBID());
if (knns_q.size() >= settings.k_max) {
knnDist_q = knns_q.getKNNDistance();
q.setKnnDistance(knnDist_q);
}
}
// q becomes knn of p
if (dist_pq <= p.getKnnDistance()) {
O obj = relation.get(p.getDBID());
KNNList knns_without_q = knnQuery.getKNNForObject(obj, settings.k_max);
if (knns_without_q.size() + 1 < settings.k_max) {
p.setKnnDistance(Double.NaN);
} else {
double knnDist_p = Math.min(knns_without_q.get(knns_without_q.size() - 1).doubleValue(), dist_pq);
p.setKnnDistance(knnDist_p);
}
}
knnDist_node = Math.max(knnDist_node, p.getKnnDistance());
}
} else // directory node
{
O obj = relation.get(((LeafEntry) q).getDBID());
List<DoubleObjPair<RdKNNEntry>> entries = getSortedEntries(node, obj, settings.distanceFunction);
for (DoubleObjPair<RdKNNEntry> distEntry : entries) {
RdKNNEntry entry = distEntry.second;
double entry_knnDist = entry.getKnnDistance();
if (distEntry.first < entry_knnDist || distEntry.first < knnDist_q) {
preInsert(q, entry, knns_q);
knnDist_q = knns_q.getKNNDistance();
}
knnDist_node = Math.max(knnDist_node, entry.getKnnDistance());
}
}
nodeEntry.setKnnDistance(knnDist_node);
}
Aggregations