Search in sources :

Example 36 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class TrivialAllInOne method run.

public Clustering<Model> run(Relation<?> relation) {
    final DBIDs ids = relation.getDBIDs();
    Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
    Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
    result.addToplevelCluster(c);
    return result;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 37 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class KMLOutputHandler method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
    ArrayList<OutlierResult> ors = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
    ArrayList<Clustering<?>> crs = ResultUtil.filterResults(hier, newResult, Clustering.class);
    if (ors.size() + crs.size() > 1) {
        throw new AbortException("More than one visualizable result found. The KML writer only supports a single result!");
    }
    Database database = ResultUtil.findDatabase(hier);
    for (OutlierResult outlierResult : ors) {
        try {
            XMLOutputFactory factory = XMLOutputFactory.newInstance();
            ZipOutputStream out = new ZipOutputStream(new FileOutputStream(filename));
            out.putNextEntry(new ZipEntry("doc.kml"));
            final XMLStreamWriter xmlw = factory.createXMLStreamWriter(out);
            writeOutlierResult(xmlw, outlierResult, database);
            xmlw.flush();
            xmlw.close();
            out.closeEntry();
            out.flush();
            out.close();
            if (autoopen) {
                Desktop.getDesktop().open(filename);
            }
        } catch (XMLStreamException e) {
            LOG.exception(e);
            throw new AbortException("XML error in KML output.", e);
        } catch (IOException e) {
            LOG.exception(e);
            throw new AbortException("IO error in KML output.", e);
        }
    }
    for (Clustering<?> clusteringResult : crs) {
        try {
            XMLOutputFactory factory = XMLOutputFactory.newInstance();
            ZipOutputStream out = new ZipOutputStream(new FileOutputStream(filename));
            out.putNextEntry(new ZipEntry("doc.kml"));
            final XMLStreamWriter xmlw = factory.createXMLStreamWriter(out);
            @SuppressWarnings("unchecked") Clustering<Model> cres = (Clustering<Model>) clusteringResult;
            writeClusteringResult(xmlw, cres, database);
            xmlw.flush();
            xmlw.close();
            out.closeEntry();
            out.flush();
            out.close();
            if (autoopen) {
                Desktop.getDesktop().open(filename);
            }
        } catch (XMLStreamException e) {
            LOG.exception(e);
            throw new AbortException("XML error in KML output.", e);
        } catch (IOException e) {
            LOG.exception(e);
            throw new AbortException("IO error in KML output.", e);
        }
    }
}
Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) ZipEntry(java.util.zip.ZipEntry) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) IOException(java.io.IOException) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) XMLStreamException(javax.xml.stream.XMLStreamException) ZipOutputStream(java.util.zip.ZipOutputStream) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) FileOutputStream(java.io.FileOutputStream) Database(de.lmu.ifi.dbs.elki.database.Database) Model(de.lmu.ifi.dbs.elki.data.model.Model) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 38 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class AutomaticEvaluation method autoEvaluateOutliers.

protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
    Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
    if (LOG.isDebugging()) {
        LOG.debug("Number of new outlier results: " + outliers.size());
    }
    if (!outliers.isEmpty()) {
        Database db = ResultUtil.findDatabase(hier);
        ensureClusteringResult(db, db);
        Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
        if (clusterings.isEmpty()) {
            LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
            return;
        }
        Clustering<?> basec = clusterings.iterator().next();
        // Find minority class label
        int min = Integer.MAX_VALUE;
        int total = 0;
        String label = null;
        if (basec.getAllClusters().size() > 1) {
            for (Cluster<?> c : basec.getAllClusters()) {
                final int csize = c.getIDs().size();
                total += csize;
                if (csize < min) {
                    min = csize;
                    label = c.getName();
                }
            }
        }
        if (label == null) {
            LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
            return;
        }
        if (min == 1) {
            LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
        }
        if (min > 0.05 * total) {
            LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
        }
        LOG.verbose("Evaluating using minority class: " + label);
        Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
        // Evaluate rankings.
        new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
        // Compute ROC curve
        new OutlierROCCurve(pat).processNewResult(hier, newResult);
        // Compute Precision at k
        new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
        // Compute ROC curve
        new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
        // Compute outlier histogram
        new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(hier, newResult);
    }
}
Also used : Pattern(java.util.regex.Pattern) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) EvaluateClustering(de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering) ByLabelOrAllInOneClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ByLabelClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering) LinearScaling(de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling) Database(de.lmu.ifi.dbs.elki.database.Database)

Example 39 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class VisualizerContext method makeStyleResult.

/**
 * Generate a new style result for the given style library.
 *
 * @param stylelib Style library
 */
protected void makeStyleResult(StyleLibrary stylelib) {
    final Database db = ResultUtil.findDatabase(hier);
    stylelibrary = stylelib;
    List<Clustering<? extends Model>> clusterings = Clustering.getClusteringResults(db);
    if (!clusterings.isEmpty()) {
        stylepolicy = new ClusterStylingPolicy(clusterings.get(0), stylelib);
    } else {
        Clustering<Model> c = generateDefaultClustering();
        stylepolicy = new ClusterStylingPolicy(c, stylelib);
    }
}
Also used : Database(de.lmu.ifi.dbs.elki.database.Database) Model(de.lmu.ifi.dbs.elki.data.model.Model) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) ByLabelHierarchicalClustering(de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelHierarchicalClustering) ClusterStylingPolicy(de.lmu.ifi.dbs.elki.visualization.style.ClusterStylingPolicy)

Example 40 with Clustering

use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.

the class NaiveAgglomerativeHierarchicalClustering1 method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public Result run(Database db, Relation<O> relation) {
    DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int size = ids.size();
    LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
    // Compute the initial distance matrix.
    double[][] matrix = new double[size][size];
    DBIDArrayIter ix = ids.iter(), iy = ids.iter();
    for (int x = 0; ix.valid(); x++, ix.advance()) {
        iy.seek(0);
        for (int y = 0; y < x; y++, iy.advance()) {
            final double dist = dq.distance(ix, iy);
            matrix[x][y] = dist;
            matrix[y][x] = dist;
        }
    }
    // Initialize space for result:
    double[] height = new double[size];
    Arrays.fill(height, Double.POSITIVE_INFINITY);
    // Parent node, to track merges
    // have every object point to itself initially
    ArrayModifiableDBIDs parent = DBIDUtil.newArray(ids);
    // Active clusters, when not trivial.
    Int2ReferenceMap<ModifiableDBIDs> clusters = new Int2ReferenceOpenHashMap<>();
    // Repeat until everything merged, except the desired number of clusters:
    final int stop = size - numclusters;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
    for (int i = 0; i < stop; i++) {
        double min = Double.POSITIVE_INFINITY;
        int minx = -1, miny = -1;
        for (int x = 0; x < size; x++) {
            if (height[x] < Double.POSITIVE_INFINITY) {
                continue;
            }
            for (int y = 0; y < x; y++) {
                if (height[y] < Double.POSITIVE_INFINITY) {
                    continue;
                }
                if (matrix[x][y] < min) {
                    min = matrix[x][y];
                    minx = x;
                    miny = y;
                }
            }
        }
        assert (minx >= 0 && miny >= 0);
        // Avoid allocating memory, by reusing existing iterators:
        ix.seek(minx);
        iy.seek(miny);
        // Perform merge in data structure: x -> y
        // Since y < x, prefer keeping y, dropping x.
        height[minx] = min;
        parent.set(minx, iy);
        // Merge into cluster
        ModifiableDBIDs cx = clusters.get(minx);
        ModifiableDBIDs cy = clusters.get(miny);
        if (cy == null) {
            cy = DBIDUtil.newHashSet();
            cy.add(iy);
        }
        if (cx == null) {
            cy.add(ix);
        } else {
            cy.addDBIDs(cx);
            clusters.remove(minx);
        }
        clusters.put(miny, cy);
        // Update distance matrix for y:
        for (int j = 0; j < size; j++) {
            matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]);
            matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]);
        }
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    // Build the clustering result
    final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
    for (int x = 0; x < size; x++) {
        if (height[x] < Double.POSITIVE_INFINITY) {
            DBIDs cids = clusters.get(x);
            if (cids == null) {
                ix.seek(x);
                cids = DBIDUtil.deref(ix);
            }
            Cluster<Model> cluster = new Cluster<>("Cluster", cids);
            dendrogram.addToplevelCluster(cluster);
        }
    }
    return dendrogram;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Cluster(de.lmu.ifi.dbs.elki.data.Cluster) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) Model(de.lmu.ifi.dbs.elki.data.model.Model) Int2ReferenceOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ReferenceOpenHashMap)

Aggregations

Clustering (de.lmu.ifi.dbs.elki.data.Clustering)68 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)32 ArrayList (java.util.ArrayList)27 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)23 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)21 Model (de.lmu.ifi.dbs.elki.data.model.Model)21 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)20 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)16 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)16 Database (de.lmu.ifi.dbs.elki.database.Database)14 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)14 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)14 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)13 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)12 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)12 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)9 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)6 MedoidModel (de.lmu.ifi.dbs.elki.data.model.MedoidModel)5