Search in sources :

Example 21 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class TrivialAllNoise method run.

public Clustering<Model> run(Relation<?> relation) {
    final DBIDs ids = relation.getDBIDs();
    Clustering<Model> result = new Clustering<>("All-in-noise trivial Clustering", "allinnoise-clustering");
    Cluster<Model> c = new Cluster<Model>(ids, true, ClusterModel.CLUSTER);
    result.addToplevelCluster(c);
    return result;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 22 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class TrivialAllInOne method run.

public Clustering<Model> run(Relation<?> relation) {
    final DBIDs ids = relation.getDBIDs();
    Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
    Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
    result.addToplevelCluster(c);
    return result;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 23 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class DiSH method buildHierarchy.

/**
 * Builds the cluster hierarchy.
 *
 * @param clustering Clustering we process
 * @param clusters the sorted list of clusters
 * @param dimensionality the dimensionality of the data
 * @param database the database containing the data objects
 */
private void buildHierarchy(Relation<V> database, Clustering<SubspaceModel> clustering, List<Cluster<SubspaceModel>> clusters, int dimensionality) {
    StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
    final int db_dim = RelationUtil.dimensionality(database);
    Hierarchy<Cluster<SubspaceModel>> hier = clustering.getClusterHierarchy();
    for (int i = 0; i < clusters.size() - 1; i++) {
        Cluster<SubspaceModel> c_i = clusters.get(i);
        final Subspace s_i = c_i.getModel().getSubspace();
        int subspaceDim_i = dimensionality - s_i.dimensionality();
        NumberVector ci_centroid = ProjectedCentroid.make(s_i.getDimensions(), database, c_i.getIDs());
        long[] pv1 = s_i.getDimensions();
        for (int j = i + 1; j < clusters.size(); j++) {
            Cluster<SubspaceModel> c_j = clusters.get(j);
            final Subspace s_j = c_j.getModel().getSubspace();
            int subspaceDim_j = dimensionality - s_j.dimensionality();
            if (subspaceDim_i < subspaceDim_j) {
                if (msg != null) {
                    msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim)).append(']');
                    msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim)).append(']');
                }
                // noise level reached
                if (s_j.dimensionality() == 0) {
                    // no parents exists -> parent is noise
                    if (hier.numParents(c_i) == 0) {
                        clustering.addChildCluster(c_j, c_i);
                        if (msg != null) {
                            msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
                            msg.append("] is parent of [").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
                            msg.append(']');
                        }
                    }
                } else {
                    NumberVector cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs());
                    long[] pv2 = s_j.getDimensions();
                    long[] commonPreferenceVector = BitsUtil.andCMin(pv1, pv2);
                    int subspaceDim = subspaceDimensionality(ci_centroid, cj_centroid, pv1, pv2, commonPreferenceVector);
                    double d = weightedDistance(ci_centroid, cj_centroid, commonPreferenceVector);
                    if (msg != null) {
                        msg.append("\n dist = ").append(subspaceDim);
                    }
                    if (subspaceDim == subspaceDim_j) {
                        if (msg != null) {
                            msg.append("\n d = ").append(d);
                        }
                        if (d <= 2 * epsilon) {
                            // existing parents
                            if (hier.numParents(c_i) == 0 || !isParent(database, c_j, hier.iterParents(c_i), db_dim)) {
                                clustering.addChildCluster(c_j, c_i);
                                if (msg != null) {
                                    msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
                                    msg.append("] is parent of [");
                                    msg.append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
                                    msg.append(']');
                                }
                            }
                        } else {
                            throw new RuntimeException("Should never happen: d = " + d);
                        }
                    }
                }
            }
        }
    }
    if (msg != null) {
        LOG.debug(msg.toString());
    }
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SubspaceModel(de.lmu.ifi.dbs.elki.data.model.SubspaceModel) Subspace(de.lmu.ifi.dbs.elki.data.Subspace) Cluster(de.lmu.ifi.dbs.elki.data.Cluster)

Example 24 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class KMLOutputHandler method writeClusteringResult.

private void writeClusteringResult(XMLStreamWriter xmlw, Clustering<Model> clustering, Database database) throws XMLStreamException {
    xmlw.writeStartDocument();
    xmlw.writeCharacters("\n");
    xmlw.writeStartElement("kml");
    xmlw.writeDefaultNamespace("http://earth.google.com/kml/2.2");
    xmlw.writeStartElement("Document");
    {
        // TODO: can we automatically generate more helpful data here?
        xmlw.writeStartElement("name");
        xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
        // name
        xmlw.writeEndElement();
        writeNewlineOnDebug(xmlw);
        // TODO: e.g. list the settings in the description?
        xmlw.writeStartElement("description");
        xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
        // description
        xmlw.writeEndElement();
        writeNewlineOnDebug(xmlw);
    }
    List<Cluster<Model>> clusters = clustering.getAllClusters();
    Relation<NumberVector> coords = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD_2D);
    List<Cluster<Model>> topc = clustering.getToplevelClusters();
    Hierarchy<Cluster<Model>> hier = clustering.getClusterHierarchy();
    Map<Object, DoubleObjPair<Polygon>> hullmap = new HashMap<>();
    for (Cluster<Model> clu : topc) {
        buildHullsRecursively(clu, hier, hullmap, coords);
    }
    {
        final double projarea = 360. * 180. * .01;
        // TODO: generate styles from color scheme
        Iterator<Cluster<Model>> it = clusters.iterator();
        for (int i = 0; it.hasNext(); i++) {
            Cluster<Model> clus = it.next();
            // This is a prime based magic number, to produce a colorful output
            Color col = Color.getHSBColor(i / 4.294967291f, 1.f, .5f);
            DoubleObjPair<Polygon> pair = hullmap.get(clus);
            // Approximate area (using bounding box)
            double hullarea = SpatialUtil.volume(pair.second);
            final double relativeArea = Math.max(1. - (hullarea / projarea), 0.);
            // final double relativeSize = pair.first / coords.size();
            final double opacity = .65 * FastMath.sqrt(relativeArea) + .1;
            xmlw.writeStartElement("Style");
            xmlw.writeAttribute("id", "s" + i);
            writeNewlineOnDebug(xmlw);
            {
                xmlw.writeStartElement("LineStyle");
                xmlw.writeStartElement("width");
                xmlw.writeCharacters("0");
                // width
                xmlw.writeEndElement();
                // LineStyle
                xmlw.writeEndElement();
            }
            writeNewlineOnDebug(xmlw);
            {
                xmlw.writeStartElement("PolyStyle");
                xmlw.writeStartElement("color");
                // KML uses AABBGGRR format!
                xmlw.writeCharacters(String.format("%02x%02x%02x%02x", (int) (255 * Math.min(.75, opacity)), col.getBlue(), col.getGreen(), col.getRed()));
                // color
                xmlw.writeEndElement();
                // out.writeStartElement("fill");
                // out.writeCharacters("1"); // Default 1
                // out.writeEndElement(); // fill
                xmlw.writeStartElement("outline");
                xmlw.writeCharacters("0");
                // outline
                xmlw.writeEndElement();
                // PolyStyle
                xmlw.writeEndElement();
            }
            writeNewlineOnDebug(xmlw);
            // Style
            xmlw.writeEndElement();
            writeNewlineOnDebug(xmlw);
        }
    }
    Cluster<?> ignore = topc.size() == 1 ? topc.get(0) : null;
    Iterator<Cluster<Model>> it = clusters.iterator();
    for (int cnum = 0; it.hasNext(); cnum++) {
        Cluster<?> c = it.next();
        // Ignore sole toplevel cluster (usually: noise)
        if (c == ignore) {
            continue;
        }
        Polygon p = hullmap.get(c).second;
        xmlw.writeStartElement("Placemark");
        {
            xmlw.writeStartElement("name");
            xmlw.writeCharacters(c.getNameAutomatic());
            // name
            xmlw.writeEndElement();
            xmlw.writeStartElement("description");
            xmlw.writeCData(makeDescription(c).toString());
            // description
            xmlw.writeEndElement();
            xmlw.writeStartElement("styleUrl");
            xmlw.writeCharacters("#s" + cnum);
            // styleUrl
            xmlw.writeEndElement();
        }
        {
            xmlw.writeStartElement("Polygon");
            writeNewlineOnDebug(xmlw);
            if (compat) {
                xmlw.writeStartElement("altitudeMode");
                xmlw.writeCharacters("relativeToGround");
                // close altitude mode
                xmlw.writeEndElement();
                writeNewlineOnDebug(xmlw);
            }
            {
                xmlw.writeStartElement("outerBoundaryIs");
                xmlw.writeStartElement("LinearRing");
                xmlw.writeStartElement("coordinates");
                // Reverse anti-clockwise polygons.
                boolean reverse = (p.testClockwise() >= 0);
                ArrayListIter<double[]> itp = p.iter();
                if (reverse) {
                    itp.seek(p.size() - 1);
                }
                while (itp.valid()) {
                    double[] v = itp.get();
                    xmlw.writeCharacters(FormatUtil.format(v, ","));
                    if (compat && (v.length == 2)) {
                        xmlw.writeCharacters(",100");
                    }
                    xmlw.writeCharacters(" ");
                    if (!reverse) {
                        itp.advance();
                    } else {
                        itp.retract();
                    }
                }
                // close coordinates
                xmlw.writeEndElement();
                // close LinearRing
                xmlw.writeEndElement();
                // close *BoundaryIs
                xmlw.writeEndElement();
            }
            writeNewlineOnDebug(xmlw);
            // Polygon
            xmlw.writeEndElement();
        }
        // Placemark
        xmlw.writeEndElement();
        writeNewlineOnDebug(xmlw);
    }
    // Document
    xmlw.writeEndElement();
    // kml
    xmlw.writeEndElement();
    xmlw.writeEndDocument();
}
Also used : HashMap(java.util.HashMap) Color(java.awt.Color) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) DoubleObjPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Model(de.lmu.ifi.dbs.elki.data.model.Model) Iterator(java.util.Iterator) PolygonsObject(de.lmu.ifi.dbs.elki.data.spatial.PolygonsObject) Polygon(de.lmu.ifi.dbs.elki.data.spatial.Polygon)

Example 25 with Cluster

use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.

the class CBLOF method run.

/**
 * Runs the CBLOF algorithm on the given database.
 *
 * @param database Database to query
 * @param relation Data to process
 * @return CBLOF outlier result
 */
public OutlierResult run(Database database, Relation<O> relation) {
    StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
    DBIDs ids = relation.getDBIDs();
    LOG.beginStep(stepprog, 1, "Computing clustering.");
    Clustering<MeanModel> clustering = clusteringAlgorithm.run(database);
    LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
    List<? extends Cluster<MeanModel>> clusters = clustering.getAllClusters();
    Collections.sort(clusters, new Comparator<Cluster<MeanModel>>() {

        @Override
        public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
            // Sort in descending order by size
            return Integer.compare(o2.size(), o1.size());
        }
    });
    int clusterBoundary = getClusterBoundary(relation, clusters);
    List<? extends Cluster<MeanModel>> largeClusters = clusters.subList(0, clusterBoundary + 1);
    List<? extends Cluster<MeanModel>> smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
    LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
    WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    DoubleMinMax cblofMinMax = new DoubleMinMax();
    computeCBLOFs(relation, distance, cblofs, cblofMinMax, largeClusters, smallClusters);
    LOG.setCompleted(stepprog);
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", "cblof-outlier", cblofs, ids);
    OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) MeanModel(de.lmu.ifi.dbs.elki.data.model.MeanModel) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) QuotientOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

Cluster (de.lmu.ifi.dbs.elki.data.Cluster)38 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)21 Model (de.lmu.ifi.dbs.elki.data.model.Model)18 ArrayList (java.util.ArrayList)14 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)11 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)11 SubspaceModel (de.lmu.ifi.dbs.elki.data.model.SubspaceModel)8 Subspace (de.lmu.ifi.dbs.elki.data.Subspace)7 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)7 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)6 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)6 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)5 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)4 HashMap (java.util.HashMap)4 ByLabelOrAllInOneClustering (de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 ProxyDatabase (de.lmu.ifi.dbs.elki.database.ProxyDatabase)3 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)3