Search in sources :

Example 1 with DoubleObjPair

use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.

the class RepresentativeUncertainClustering method run.

/**
 * This run method will do the wrapping.
 *
 * Its called from {@link AbstractAlgorithm#run(Database)} and performs the
 * call to the algorithms particular run method as well as the storing and
 * comparison of the resulting Clusterings.
 *
 * @param database Database
 * @param relation Data relation of uncertain objects
 * @return Clustering result
 */
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
    ResultHierarchy hierarchy = database.getHierarchy();
    ArrayList<Clustering<?>> clusterings = new ArrayList<>();
    final int dim = RelationUtil.dimensionality(relation);
    DBIDs ids = relation.getDBIDs();
    // To collect samples
    Result samples = new BasicResult("Samples", "samples");
    // Step 1: Cluster sampled possible worlds:
    Random rand = random.getSingleThreadedRandom();
    FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
    for (int i = 0; i < numsamples; i++) {
        WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            store.put(iter, relation.get(iter).drawSample(rand));
        }
        clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
        LOG.incrementProcessed(sampleP);
    }
    LOG.ensureCompleted(sampleP);
    // Step 2: perform the meta clustering (on samples only).
    DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
    WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
    {
        Iterator<Clustering<?>> it2 = clusterings.iterator();
        for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
            datastore.put(iter, it2.next());
        }
    }
    assert (rids.size() == clusterings.size());
    // Build a relation, and a distance matrix.
    Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
    PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
    mat.initialize();
    ProxyDatabase d = new ProxyDatabase(rids, crel);
    d.getHierarchy().add(crel, mat);
    Clustering<?> c = metaAlgorithm.run(d);
    // Detach from database
    d.getHierarchy().remove(d, c);
    // Evaluation
    Result reps = new BasicResult("Representants", "representative");
    hierarchy.add(relation, reps);
    DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
    List<? extends Cluster<?>> cl = c.getAllClusters();
    List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
    for (Cluster<?> clus : cl) {
        double besttau = Double.POSITIVE_INFINITY;
        Clustering<?> bestc = null;
        for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
            double tau = 0.;
            Clustering<?> curc = crel.get(it1);
            for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
                if (DBIDUtil.equal(it1, it2)) {
                    continue;
                }
                double di = dq.distance(curc, it2);
                tau = di > tau ? di : tau;
            }
            // Cluster member with the least maximum distance.
            if (tau < besttau) {
                besttau = tau;
                bestc = curc;
            }
        }
        if (bestc == null) {
            // E.g. degenerate empty clusters
            continue;
        }
        // Global tau:
        double gtau = 0.;
        for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
            double di = dq.distance(bestc, it2);
            gtau = di > gtau ? di : gtau;
        }
        final double cprob = computeConfidence(clus.size(), crel.size());
        // Build an evaluation result
        hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
        evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
    }
    // Sort evaluated results by confidence:
    Collections.sort(evaluated, Collections.reverseOrder());
    for (DoubleObjPair<Clustering<?>> pair : evaluated) {
        // Attach parent relation (= sample) to the representative samples.
        for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
            hierarchy.add(reps, it.get());
        }
    }
    // Add the random samples below the representative results only:
    if (keep) {
        hierarchy.add(relation, samples);
    } else {
        hierarchy.removeSubtree(samples);
    }
    return c;
}
Also used : ArrayList(java.util.ArrayList) Result(de.lmu.ifi.dbs.elki.result.Result) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) Random(java.util.Random) BasicResult(de.lmu.ifi.dbs.elki.result.BasicResult) Iterator(java.util.Iterator) ResultHierarchy(de.lmu.ifi.dbs.elki.result.ResultHierarchy) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) PrecomputedDistanceMatrix(de.lmu.ifi.dbs.elki.index.distancematrix.PrecomputedDistanceMatrix) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 2 with DoubleObjPair

use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.

the class KMLOutputHandler method buildHullsRecursively.

/**
 * Recursively step through the clusters to build the hulls.
 *
 * @param clu Current cluster
 * @param hier Clustering hierarchy
 * @param hulls Hull map
 */
private DoubleObjPair<Polygon> buildHullsRecursively(Cluster<Model> clu, Hierarchy<Cluster<Model>> hier, Map<Object, DoubleObjPair<Polygon>> hulls, Relation<? extends NumberVector> coords) {
    final DBIDs ids = clu.getIDs();
    GrahamScanConvexHull2D hull = new GrahamScanConvexHull2D();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        hull.add(coords.get(iter).toArray());
    }
    double weight = ids.size();
    if (hier != null && hulls != null) {
        final int numc = hier.numChildren(clu);
        if (numc > 0) {
            for (It<Cluster<Model>> iter = hier.iterChildren(clu); iter.valid(); iter.advance()) {
                final Cluster<Model> iclu = iter.get();
                DoubleObjPair<Polygon> poly = hulls.get(iclu);
                if (poly == null) {
                    poly = buildHullsRecursively(iclu, hier, hulls, coords);
                }
                // Add inner convex hull to outer convex hull.
                for (ArrayListIter<double[]> vi = poly.second.iter(); vi.valid(); vi.advance()) {
                    hull.add(vi.get());
                }
                weight += poly.first / numc;
            }
        }
    }
    DoubleObjPair<Polygon> pair = new DoubleObjPair<>(weight, hull.getHull());
    hulls.put(clu, pair);
    return pair;
}
Also used : ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair) GrahamScanConvexHull2D(de.lmu.ifi.dbs.elki.math.geometry.GrahamScanConvexHull2D) Model(de.lmu.ifi.dbs.elki.data.model.Model) Polygon(de.lmu.ifi.dbs.elki.data.spatial.Polygon)

Example 3 with DoubleObjPair

use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.

the class KMLOutputHandler method writeClusteringResult.

private void writeClusteringResult(XMLStreamWriter xmlw, Clustering<Model> clustering, Database database) throws XMLStreamException {
    xmlw.writeStartDocument();
    xmlw.writeCharacters("\n");
    xmlw.writeStartElement("kml");
    xmlw.writeDefaultNamespace("http://earth.google.com/kml/2.2");
    xmlw.writeStartElement("Document");
    {
        // TODO: can we automatically generate more helpful data here?
        xmlw.writeStartElement("name");
        xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
        // name
        xmlw.writeEndElement();
        writeNewlineOnDebug(xmlw);
        // TODO: e.g. list the settings in the description?
        xmlw.writeStartElement("description");
        xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
        // description
        xmlw.writeEndElement();
        writeNewlineOnDebug(xmlw);
    }
    List<Cluster<Model>> clusters = clustering.getAllClusters();
    Relation<NumberVector> coords = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD_2D);
    List<Cluster<Model>> topc = clustering.getToplevelClusters();
    Hierarchy<Cluster<Model>> hier = clustering.getClusterHierarchy();
    Map<Object, DoubleObjPair<Polygon>> hullmap = new HashMap<>();
    for (Cluster<Model> clu : topc) {
        buildHullsRecursively(clu, hier, hullmap, coords);
    }
    {
        final double projarea = 360. * 180. * .01;
        // TODO: generate styles from color scheme
        Iterator<Cluster<Model>> it = clusters.iterator();
        for (int i = 0; it.hasNext(); i++) {
            Cluster<Model> clus = it.next();
            // This is a prime based magic number, to produce a colorful output
            Color col = Color.getHSBColor(i / 4.294967291f, 1.f, .5f);
            DoubleObjPair<Polygon> pair = hullmap.get(clus);
            // Approximate area (using bounding box)
            double hullarea = SpatialUtil.volume(pair.second);
            final double relativeArea = Math.max(1. - (hullarea / projarea), 0.);
            // final double relativeSize = pair.first / coords.size();
            final double opacity = .65 * FastMath.sqrt(relativeArea) + .1;
            xmlw.writeStartElement("Style");
            xmlw.writeAttribute("id", "s" + i);
            writeNewlineOnDebug(xmlw);
            {
                xmlw.writeStartElement("LineStyle");
                xmlw.writeStartElement("width");
                xmlw.writeCharacters("0");
                // width
                xmlw.writeEndElement();
                // LineStyle
                xmlw.writeEndElement();
            }
            writeNewlineOnDebug(xmlw);
            {
                xmlw.writeStartElement("PolyStyle");
                xmlw.writeStartElement("color");
                // KML uses AABBGGRR format!
                xmlw.writeCharacters(String.format("%02x%02x%02x%02x", (int) (255 * Math.min(.75, opacity)), col.getBlue(), col.getGreen(), col.getRed()));
                // color
                xmlw.writeEndElement();
                // out.writeStartElement("fill");
                // out.writeCharacters("1"); // Default 1
                // out.writeEndElement(); // fill
                xmlw.writeStartElement("outline");
                xmlw.writeCharacters("0");
                // outline
                xmlw.writeEndElement();
                // PolyStyle
                xmlw.writeEndElement();
            }
            writeNewlineOnDebug(xmlw);
            // Style
            xmlw.writeEndElement();
            writeNewlineOnDebug(xmlw);
        }
    }
    Cluster<?> ignore = topc.size() == 1 ? topc.get(0) : null;
    Iterator<Cluster<Model>> it = clusters.iterator();
    for (int cnum = 0; it.hasNext(); cnum++) {
        Cluster<?> c = it.next();
        // Ignore sole toplevel cluster (usually: noise)
        if (c == ignore) {
            continue;
        }
        Polygon p = hullmap.get(c).second;
        xmlw.writeStartElement("Placemark");
        {
            xmlw.writeStartElement("name");
            xmlw.writeCharacters(c.getNameAutomatic());
            // name
            xmlw.writeEndElement();
            xmlw.writeStartElement("description");
            xmlw.writeCData(makeDescription(c).toString());
            // description
            xmlw.writeEndElement();
            xmlw.writeStartElement("styleUrl");
            xmlw.writeCharacters("#s" + cnum);
            // styleUrl
            xmlw.writeEndElement();
        }
        {
            xmlw.writeStartElement("Polygon");
            writeNewlineOnDebug(xmlw);
            if (compat) {
                xmlw.writeStartElement("altitudeMode");
                xmlw.writeCharacters("relativeToGround");
                // close altitude mode
                xmlw.writeEndElement();
                writeNewlineOnDebug(xmlw);
            }
            {
                xmlw.writeStartElement("outerBoundaryIs");
                xmlw.writeStartElement("LinearRing");
                xmlw.writeStartElement("coordinates");
                // Reverse anti-clockwise polygons.
                boolean reverse = (p.testClockwise() >= 0);
                ArrayListIter<double[]> itp = p.iter();
                if (reverse) {
                    itp.seek(p.size() - 1);
                }
                while (itp.valid()) {
                    double[] v = itp.get();
                    xmlw.writeCharacters(FormatUtil.format(v, ","));
                    if (compat && (v.length == 2)) {
                        xmlw.writeCharacters(",100");
                    }
                    xmlw.writeCharacters(" ");
                    if (!reverse) {
                        itp.advance();
                    } else {
                        itp.retract();
                    }
                }
                // close coordinates
                xmlw.writeEndElement();
                // close LinearRing
                xmlw.writeEndElement();
                // close *BoundaryIs
                xmlw.writeEndElement();
            }
            writeNewlineOnDebug(xmlw);
            // Polygon
            xmlw.writeEndElement();
        }
        // Placemark
        xmlw.writeEndElement();
        writeNewlineOnDebug(xmlw);
    }
    // Document
    xmlw.writeEndElement();
    // kml
    xmlw.writeEndElement();
    xmlw.writeEndDocument();
}
Also used : HashMap(java.util.HashMap) Color(java.awt.Color) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Model(de.lmu.ifi.dbs.elki.data.model.Model) Iterator(java.util.Iterator) PolygonsObject(de.lmu.ifi.dbs.elki.data.spatial.PolygonsObject) Polygon(de.lmu.ifi.dbs.elki.data.spatial.Polygon)

Example 4 with DoubleObjPair

use of de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair in project elki by elki-project.

the class RdKNNTree method preInsert.

/**
 * Adapts the knn distances before insertion of entry q.
 *
 * @param q the entry to be inserted
 * @param nodeEntry the entry representing the root of the current subtree
 * @param knns_q the knns of q
 */
private void preInsert(RdKNNEntry q, RdKNNEntry nodeEntry, KNNHeap knns_q) {
    double knnDist_q = knns_q.getKNNDistance();
    RdKNNNode node = getNode(nodeEntry);
    double knnDist_node = 0.;
    // leaf node
    if (node.isLeaf()) {
        for (int i = 0; i < node.getNumEntries(); i++) {
            RdKNNLeafEntry p = (RdKNNLeafEntry) node.getEntry(i);
            double dist_pq = distanceQuery.distance(p.getDBID(), ((LeafEntry) q).getDBID());
            // ==> p becomes a knn-candidate
            if (dist_pq <= knnDist_q) {
                knns_q.insert(dist_pq, p.getDBID());
                if (knns_q.size() >= settings.k_max) {
                    knnDist_q = knns_q.getKNNDistance();
                    q.setKnnDistance(knnDist_q);
                }
            }
            // q becomes knn of p
            if (dist_pq <= p.getKnnDistance()) {
                O obj = relation.get(p.getDBID());
                KNNList knns_without_q = knnQuery.getKNNForObject(obj, settings.k_max);
                if (knns_without_q.size() + 1 < settings.k_max) {
                    p.setKnnDistance(Double.NaN);
                } else {
                    double knnDist_p = Math.min(knns_without_q.get(knns_without_q.size() - 1).doubleValue(), dist_pq);
                    p.setKnnDistance(knnDist_p);
                }
            }
            knnDist_node = Math.max(knnDist_node, p.getKnnDistance());
        }
    } else // directory node
    {
        O obj = relation.get(((LeafEntry) q).getDBID());
        List<DoubleObjPair<RdKNNEntry>> entries = getSortedEntries(node, obj, settings.distanceFunction);
        for (DoubleObjPair<RdKNNEntry> distEntry : entries) {
            RdKNNEntry entry = distEntry.second;
            double entry_knnDist = entry.getKnnDistance();
            if (distEntry.first < entry_knnDist || distEntry.first < knnDist_q) {
                preInsert(q, entry, knns_q);
                knnDist_q = knns_q.getKNNDistance();
            }
            knnDist_node = Math.max(knnDist_node, entry.getKnnDistance());
        }
    }
    nodeEntry.setKnnDistance(knnDist_node);
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair)

Aggregations

DoubleObjPair (de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)2 Model (de.lmu.ifi.dbs.elki.data.model.Model)2 Polygon (de.lmu.ifi.dbs.elki.data.spatial.Polygon)2 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)2 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)2 Iterator (java.util.Iterator)2 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 PolygonsObject (de.lmu.ifi.dbs.elki.data.spatial.PolygonsObject)1 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)1 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)1 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)1 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)1 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)1 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)1 PrecomputedDistanceMatrix (de.lmu.ifi.dbs.elki.index.distancematrix.PrecomputedDistanceMatrix)1 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)1 GrahamScanConvexHull2D (de.lmu.ifi.dbs.elki.math.geometry.GrahamScanConvexHull2D)1