use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class ClusteringVectorParser method nextEvent.
@Override
public Event nextEvent() {
if (nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
while (reader.nextLineExceptComments()) {
buf1.clear();
lbl.clear();
Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
String name = null;
for (; /* initialized by nextLineExceptComments() */
tokenizer.valid(); tokenizer.advance()) {
try {
int cnum = tokenizer.getIntBase10();
buf1.add(cnum);
// Update cluster sizes:
csize.addTo(cnum, 1);
} catch (NumberFormatException e) {
final String label = tokenizer.getSubstring();
lbl.add(label);
if (name == null) {
name = label;
}
}
}
if (name == null) {
name = "Cluster";
}
// Update meta on first record:
boolean metaupdate = (range == null);
if (range == null) {
range = DBIDUtil.generateStaticDBIDRange(buf1.size());
}
if (buf1.size() != range.size()) {
throw new AbortException("Clusterings do not contain the same number of elements!");
}
// Build clustering to store in the relation.
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
curclu = new Clustering<>(name, name);
for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Int2IntMap.Entry entry = iter.next();
if (entry.getIntValue() > 0) {
clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
}
}
DBIDArrayIter iter = range.iter();
for (int i = 0; i < buf1.size(); i++) {
clusters.get(buf1.getInt(i)).add(iter.seek(i));
}
for (ModifiableDBIDs cids : clusters.values()) {
curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
}
// Label handling.
if (!haslbl && !lbl.isEmpty()) {
haslbl = true;
metaupdate = true;
}
curlbl = LabelList.make(lbl);
if (metaupdate) {
// Force a meta update.
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
}
return Event.NEXT_OBJECT;
}
return Event.END_OF_STREAM;
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
}
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class ByLabelOrAllInOneClustering method run.
@Override
public Clustering<Model> run(Database database) {
// Prefer a true class label
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(relation);
} catch (NoSupportedDataTypeException e) {
// Ignore.
}
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.GUESSED_LABEL);
return run(relation);
} catch (NoSupportedDataTypeException e) {
// Ignore.
}
final DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class TrivialAllNoise method run.
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-noise trivial Clustering", "allinnoise-clustering");
Cluster<Model> c = new Cluster<Model>(ids, true, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class TrivialAllInOne method run.
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class CASH method doRun.
/**
* Runs the CASH algorithm on the specified database, this method is
* recursively called until only noise is left.
*
* @param relation the Relation to run the CASH algorithm on
* @param progress the progress object for verbose messages
* @return a mapping of subspace dimensionalities to clusters
*/
private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
Clustering<Model> res = new Clustering<>("CASH clustering", "cash-clustering");
final int dim = dimensionality(relation);
// init heap
ObjectHeap<IntegerPriorityObject<CASHInterval>> heap = new ComparableMinHeap<>();
ModifiableDBIDs noiseIDs = DBIDUtil.newHashSet(relation.getDBIDs());
initHeap(heap, relation, dim, noiseIDs);
if (LOG.isVerbose()) {
LOG.verbose(new StringBuilder().append("dim ").append(dim).append(" database.size ").append(relation.size()).toString());
}
// get the ''best'' d-dimensional intervals at max level
while (!heap.isEmpty()) {
CASHInterval interval = determineNextIntervalAtMaxLevel(heap);
if (LOG.isVerbose()) {
LOG.verbose("next interval in dim " + dim + ": " + interval);
}
// only noise left
if (interval == null) {
break;
}
// do a dim-1 dimensional run
ModifiableDBIDs clusterIDs = DBIDUtil.newHashSet();
if (dim > minDim + 1) {
ModifiableDBIDs ids;
double[][] basis_dim_minus_1;
if (adjust) {
ids = DBIDUtil.newHashSet();
basis_dim_minus_1 = runDerivator(relation, dim, interval, ids);
} else {
ids = interval.getIDs();
basis_dim_minus_1 = determineBasis(SpatialUtil.centroid(interval));
}
if (ids.size() != 0) {
MaterializedRelation<ParameterizationFunction> db = buildDB(dim, basis_dim_minus_1, ids, relation);
// add result of dim-1 to this result
Clustering<Model> res_dim_minus_1 = doRun(db, progress);
for (Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
res.addToplevelCluster(cluster);
noiseIDs.removeDBIDs(cluster.getIDs());
clusterIDs.addDBIDs(cluster.getIDs());
processedIDs.addDBIDs(cluster.getIDs());
}
}
} else // dim == minDim
{
LinearEquationSystem les = runDerivator(relation, dim - 1, interval.getIDs());
Cluster<Model> c = new Cluster<Model>(interval.getIDs(), new LinearEquationModel(les));
res.addToplevelCluster(c);
noiseIDs.removeDBIDs(interval.getIDs());
clusterIDs.addDBIDs(interval.getIDs());
processedIDs.addDBIDs(interval.getIDs());
}
// Rebuild heap
ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<>(heap.size());
for (ObjectHeap.UnsortedIter<IntegerPriorityObject<CASHInterval>> iter = heap.unsortedIter(); iter.valid(); iter.advance()) {
heapVector.add(iter.get());
}
heap.clear();
for (IntegerPriorityObject<CASHInterval> pair : heapVector) {
CASHInterval currentInterval = pair.getObject();
currentInterval.removeIDs(clusterIDs);
if (currentInterval.getIDs().size() >= minPts) {
heap.add(new IntegerPriorityObject<>(currentInterval.priority(), currentInterval));
}
}
if (progress != null) {
progress.setProcessed(processedIDs.size(), LOG);
}
}
// put noise to clusters
if (!noiseIDs.isEmpty()) {
if (dim == noiseDim) {
res.addToplevelCluster(new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER));
processedIDs.addDBIDs(noiseIDs);
} else if (noiseIDs.size() >= minPts) {
LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
res.addToplevelCluster(new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les)));
processedIDs.addDBIDs(noiseIDs);
}
}
if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append("noise fuer dim ").append(dim).append(": ").append(noiseIDs.size());
for (Cluster<Model> c : res.getAllClusters()) {
if (c.getModel() instanceof LinearEquationModel) {
msg.append("\n Cluster: Dim: ").append(((LinearEquationModel) c.getModel()).getLes().subspacedim());
} else {
msg.append("\n Cluster: ").append(c.getModel().getClass().getName());
}
msg.append(" size: ").append(c.size());
}
LOG.debugFine(msg.toString());
}
if (progress != null) {
progress.setProcessed(processedIDs.size(), LOG);
}
return res;
}
Aggregations