use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class TrivialAllNoise method run.
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-noise trivial Clustering", "allinnoise-clustering");
Cluster<Model> c = new Cluster<Model>(ids, true, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class TrivialAllInOne method run.
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class DiSH method buildHierarchy.
/**
* Builds the cluster hierarchy.
*
* @param clustering Clustering we process
* @param clusters the sorted list of clusters
* @param dimensionality the dimensionality of the data
* @param database the database containing the data objects
*/
private void buildHierarchy(Relation<V> database, Clustering<SubspaceModel> clustering, List<Cluster<SubspaceModel>> clusters, int dimensionality) {
StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
final int db_dim = RelationUtil.dimensionality(database);
Hierarchy<Cluster<SubspaceModel>> hier = clustering.getClusterHierarchy();
for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel> c_i = clusters.get(i);
final Subspace s_i = c_i.getModel().getSubspace();
int subspaceDim_i = dimensionality - s_i.dimensionality();
NumberVector ci_centroid = ProjectedCentroid.make(s_i.getDimensions(), database, c_i.getIDs());
long[] pv1 = s_i.getDimensions();
for (int j = i + 1; j < clusters.size(); j++) {
Cluster<SubspaceModel> c_j = clusters.get(j);
final Subspace s_j = c_j.getModel().getSubspace();
int subspaceDim_j = dimensionality - s_j.dimensionality();
if (subspaceDim_i < subspaceDim_j) {
if (msg != null) {
msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim)).append(']');
msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim)).append(']');
}
// noise level reached
if (s_j.dimensionality() == 0) {
// no parents exists -> parent is noise
if (hier.numParents(c_i) == 0) {
clustering.addChildCluster(c_j, c_i);
if (msg != null) {
msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
msg.append("] is parent of [").append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
msg.append(']');
}
}
} else {
NumberVector cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs());
long[] pv2 = s_j.getDimensions();
long[] commonPreferenceVector = BitsUtil.andCMin(pv1, pv2);
int subspaceDim = subspaceDimensionality(ci_centroid, cj_centroid, pv1, pv2, commonPreferenceVector);
double d = weightedDistance(ci_centroid, cj_centroid, commonPreferenceVector);
if (msg != null) {
msg.append("\n dist = ").append(subspaceDim);
}
if (subspaceDim == subspaceDim_j) {
if (msg != null) {
msg.append("\n d = ").append(d);
}
if (d <= 2 * epsilon) {
// existing parents
if (hier.numParents(c_i) == 0 || !isParent(database, c_j, hier.iterParents(c_i), db_dim)) {
clustering.addChildCluster(c_j, c_i);
if (msg != null) {
msg.append("\n [").append(BitsUtil.toStringLow(s_j.getDimensions(), db_dim));
msg.append("] is parent of [");
msg.append(BitsUtil.toStringLow(s_i.getDimensions(), db_dim));
msg.append(']');
}
}
} else {
throw new RuntimeException("Should never happen: d = " + d);
}
}
}
}
}
}
if (msg != null) {
LOG.debug(msg.toString());
}
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class KMLOutputHandler method writeClusteringResult.
private void writeClusteringResult(XMLStreamWriter xmlw, Clustering<Model> clustering, Database database) throws XMLStreamException {
xmlw.writeStartDocument();
xmlw.writeCharacters("\n");
xmlw.writeStartElement("kml");
xmlw.writeDefaultNamespace("http://earth.google.com/kml/2.2");
xmlw.writeStartElement("Document");
{
// TODO: can we automatically generate more helpful data here?
xmlw.writeStartElement("name");
xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
// name
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
// TODO: e.g. list the settings in the description?
xmlw.writeStartElement("description");
xmlw.writeCharacters("ELKI KML output for " + clustering.getLongName());
// description
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
List<Cluster<Model>> clusters = clustering.getAllClusters();
Relation<NumberVector> coords = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD_2D);
List<Cluster<Model>> topc = clustering.getToplevelClusters();
Hierarchy<Cluster<Model>> hier = clustering.getClusterHierarchy();
Map<Object, DoubleObjPair<Polygon>> hullmap = new HashMap<>();
for (Cluster<Model> clu : topc) {
buildHullsRecursively(clu, hier, hullmap, coords);
}
{
final double projarea = 360. * 180. * .01;
// TODO: generate styles from color scheme
Iterator<Cluster<Model>> it = clusters.iterator();
for (int i = 0; it.hasNext(); i++) {
Cluster<Model> clus = it.next();
// This is a prime based magic number, to produce a colorful output
Color col = Color.getHSBColor(i / 4.294967291f, 1.f, .5f);
DoubleObjPair<Polygon> pair = hullmap.get(clus);
// Approximate area (using bounding box)
double hullarea = SpatialUtil.volume(pair.second);
final double relativeArea = Math.max(1. - (hullarea / projarea), 0.);
// final double relativeSize = pair.first / coords.size();
final double opacity = .65 * FastMath.sqrt(relativeArea) + .1;
xmlw.writeStartElement("Style");
xmlw.writeAttribute("id", "s" + i);
writeNewlineOnDebug(xmlw);
{
xmlw.writeStartElement("LineStyle");
xmlw.writeStartElement("width");
xmlw.writeCharacters("0");
// width
xmlw.writeEndElement();
// LineStyle
xmlw.writeEndElement();
}
writeNewlineOnDebug(xmlw);
{
xmlw.writeStartElement("PolyStyle");
xmlw.writeStartElement("color");
// KML uses AABBGGRR format!
xmlw.writeCharacters(String.format("%02x%02x%02x%02x", (int) (255 * Math.min(.75, opacity)), col.getBlue(), col.getGreen(), col.getRed()));
// color
xmlw.writeEndElement();
// out.writeStartElement("fill");
// out.writeCharacters("1"); // Default 1
// out.writeEndElement(); // fill
xmlw.writeStartElement("outline");
xmlw.writeCharacters("0");
// outline
xmlw.writeEndElement();
// PolyStyle
xmlw.writeEndElement();
}
writeNewlineOnDebug(xmlw);
// Style
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
}
Cluster<?> ignore = topc.size() == 1 ? topc.get(0) : null;
Iterator<Cluster<Model>> it = clusters.iterator();
for (int cnum = 0; it.hasNext(); cnum++) {
Cluster<?> c = it.next();
// Ignore sole toplevel cluster (usually: noise)
if (c == ignore) {
continue;
}
Polygon p = hullmap.get(c).second;
xmlw.writeStartElement("Placemark");
{
xmlw.writeStartElement("name");
xmlw.writeCharacters(c.getNameAutomatic());
// name
xmlw.writeEndElement();
xmlw.writeStartElement("description");
xmlw.writeCData(makeDescription(c).toString());
// description
xmlw.writeEndElement();
xmlw.writeStartElement("styleUrl");
xmlw.writeCharacters("#s" + cnum);
// styleUrl
xmlw.writeEndElement();
}
{
xmlw.writeStartElement("Polygon");
writeNewlineOnDebug(xmlw);
if (compat) {
xmlw.writeStartElement("altitudeMode");
xmlw.writeCharacters("relativeToGround");
// close altitude mode
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
{
xmlw.writeStartElement("outerBoundaryIs");
xmlw.writeStartElement("LinearRing");
xmlw.writeStartElement("coordinates");
// Reverse anti-clockwise polygons.
boolean reverse = (p.testClockwise() >= 0);
ArrayListIter<double[]> itp = p.iter();
if (reverse) {
itp.seek(p.size() - 1);
}
while (itp.valid()) {
double[] v = itp.get();
xmlw.writeCharacters(FormatUtil.format(v, ","));
if (compat && (v.length == 2)) {
xmlw.writeCharacters(",100");
}
xmlw.writeCharacters(" ");
if (!reverse) {
itp.advance();
} else {
itp.retract();
}
}
// close coordinates
xmlw.writeEndElement();
// close LinearRing
xmlw.writeEndElement();
// close *BoundaryIs
xmlw.writeEndElement();
}
writeNewlineOnDebug(xmlw);
// Polygon
xmlw.writeEndElement();
}
// Placemark
xmlw.writeEndElement();
writeNewlineOnDebug(xmlw);
}
// Document
xmlw.writeEndElement();
// kml
xmlw.writeEndElement();
xmlw.writeEndDocument();
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class CBLOF method run.
/**
* Runs the CBLOF algorithm on the given database.
*
* @param database Database to query
* @param relation Data to process
* @return CBLOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Computing clustering.");
Clustering<MeanModel> clustering = clusteringAlgorithm.run(database);
LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
List<? extends Cluster<MeanModel>> clusters = clustering.getAllClusters();
Collections.sort(clusters, new Comparator<Cluster<MeanModel>>() {
@Override
public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
// Sort in descending order by size
return Integer.compare(o2.size(), o1.size());
}
});
int clusterBoundary = getClusterBoundary(relation, clusters);
List<? extends Cluster<MeanModel>> largeClusters = clusters.subList(0, clusterBoundary + 1);
List<? extends Cluster<MeanModel>> smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
DoubleMinMax cblofMinMax = new DoubleMinMax();
computeCBLOFs(relation, distance, cblofs, cblofMinMax, largeClusters, smallClusters);
LOG.setCompleted(stepprog);
DoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", "cblof-outlier", cblofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
Aggregations