use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class TrivialAllInOne method run.
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class KMLOutputHandler method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
ArrayList<OutlierResult> ors = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
ArrayList<Clustering<?>> crs = ResultUtil.filterResults(hier, newResult, Clustering.class);
if (ors.size() + crs.size() > 1) {
throw new AbortException("More than one visualizable result found. The KML writer only supports a single result!");
}
Database database = ResultUtil.findDatabase(hier);
for (OutlierResult outlierResult : ors) {
try {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(filename));
out.putNextEntry(new ZipEntry("doc.kml"));
final XMLStreamWriter xmlw = factory.createXMLStreamWriter(out);
writeOutlierResult(xmlw, outlierResult, database);
xmlw.flush();
xmlw.close();
out.closeEntry();
out.flush();
out.close();
if (autoopen) {
Desktop.getDesktop().open(filename);
}
} catch (XMLStreamException e) {
LOG.exception(e);
throw new AbortException("XML error in KML output.", e);
} catch (IOException e) {
LOG.exception(e);
throw new AbortException("IO error in KML output.", e);
}
}
for (Clustering<?> clusteringResult : crs) {
try {
XMLOutputFactory factory = XMLOutputFactory.newInstance();
ZipOutputStream out = new ZipOutputStream(new FileOutputStream(filename));
out.putNextEntry(new ZipEntry("doc.kml"));
final XMLStreamWriter xmlw = factory.createXMLStreamWriter(out);
@SuppressWarnings("unchecked") Clustering<Model> cres = (Clustering<Model>) clusteringResult;
writeClusteringResult(xmlw, cres, database);
xmlw.flush();
xmlw.close();
out.closeEntry();
out.flush();
out.close();
if (autoopen) {
Desktop.getDesktop().open(filename);
}
} catch (XMLStreamException e) {
LOG.exception(e);
throw new AbortException("XML error in KML output.", e);
} catch (IOException e) {
LOG.exception(e);
throw new AbortException("IO error in KML output.", e);
}
}
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class AutomaticEvaluation method autoEvaluateOutliers.
protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
if (LOG.isDebugging()) {
LOG.debug("Number of new outlier results: " + outliers.size());
}
if (!outliers.isEmpty()) {
Database db = ResultUtil.findDatabase(hier);
ensureClusteringResult(db, db);
Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
if (clusterings.isEmpty()) {
LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
return;
}
Clustering<?> basec = clusterings.iterator().next();
// Find minority class label
int min = Integer.MAX_VALUE;
int total = 0;
String label = null;
if (basec.getAllClusters().size() > 1) {
for (Cluster<?> c : basec.getAllClusters()) {
final int csize = c.getIDs().size();
total += csize;
if (csize < min) {
min = csize;
label = c.getName();
}
}
}
if (label == null) {
LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
return;
}
if (min == 1) {
LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
}
if (min > 0.05 * total) {
LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
}
LOG.verbose("Evaluating using minority class: " + label);
Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
// Evaluate rankings.
new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
// Compute ROC curve
new OutlierROCCurve(pat).processNewResult(hier, newResult);
// Compute Precision at k
new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
// Compute ROC curve
new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
// Compute outlier histogram
new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(hier, newResult);
}
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class VisualizerContext method makeStyleResult.
/**
* Generate a new style result for the given style library.
*
* @param stylelib Style library
*/
protected void makeStyleResult(StyleLibrary stylelib) {
final Database db = ResultUtil.findDatabase(hier);
stylelibrary = stylelib;
List<Clustering<? extends Model>> clusterings = Clustering.getClusteringResults(db);
if (!clusterings.isEmpty()) {
stylepolicy = new ClusterStylingPolicy(clusterings.get(0), stylelib);
} else {
Clustering<Model> c = generateDefaultClustering();
stylepolicy = new ClusterStylingPolicy(c, stylelib);
}
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class NaiveAgglomerativeHierarchicalClustering1 method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
// Compute the initial distance matrix.
double[][] matrix = new double[size][size];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
final double dist = dq.distance(ix, iy);
matrix[x][y] = dist;
matrix[y][x] = dist;
}
}
// Initialize space for result:
double[] height = new double[size];
Arrays.fill(height, Double.POSITIVE_INFINITY);
// Parent node, to track merges
// have every object point to itself initially
ArrayModifiableDBIDs parent = DBIDUtil.newArray(ids);
// Active clusters, when not trivial.
Int2ReferenceMap<ModifiableDBIDs> clusters = new Int2ReferenceOpenHashMap<>();
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
for (int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
for (int x = 0; x < size; x++) {
if (height[x] < Double.POSITIVE_INFINITY) {
continue;
}
for (int y = 0; y < x; y++) {
if (height[y] < Double.POSITIVE_INFINITY) {
continue;
}
if (matrix[x][y] < min) {
min = matrix[x][y];
minx = x;
miny = y;
}
}
}
assert (minx >= 0 && miny >= 0);
// Avoid allocating memory, by reusing existing iterators:
ix.seek(minx);
iy.seek(miny);
// Perform merge in data structure: x -> y
// Since y < x, prefer keeping y, dropping x.
height[minx] = min;
parent.set(minx, iy);
// Merge into cluster
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
if (cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
}
if (cx == null) {
cy.add(ix);
} else {
cy.addDBIDs(cx);
clusters.remove(minx);
}
clusters.put(miny, cy);
// Update distance matrix for y:
for (int j = 0; j < size; j++) {
matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]);
matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
for (int x = 0; x < size; x++) {
if (height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
if (cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
Cluster<Model> cluster = new Cluster<>("Cluster", cids);
dendrogram.addToplevelCluster(cluster);
}
}
return dendrogram;
}
Aggregations