use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.
the class ClusteringVectorDumper method dumpClusteringOutput.
/**
* Dump a single clustering result.
*
* @param writer Output writer
* @param hierarchy Cluster hierarchy to process
* @param c Clustering result
*/
protected void dumpClusteringOutput(PrintStream writer, ResultHierarchy hierarchy, Clustering<?> c) {
DBIDRange ids = null;
for (It<Relation<?>> iter = hierarchy.iterParents(c).filter(Relation.class); iter.valid(); iter.advance()) {
DBIDs pids = iter.get().getDBIDs();
if (pids instanceof DBIDRange) {
ids = (DBIDRange) pids;
break;
}
LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
}
// Fallback: try to locate a database.
if (ids == null) {
for (It<Database> iter = hierarchy.iterAll().filter(Database.class); iter.valid(); iter.advance()) {
DBIDs pids = iter.get().getRelation(TypeUtil.ANY).getDBIDs();
if (pids instanceof DBIDRange) {
ids = (DBIDRange) pids;
break;
}
LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
}
}
if (ids == null) {
LOG.warning("Cannot dump cluster assignment, as I do not have a well-defined DBIDRange to use for a unique column assignment. DBIDs must be a continuous range.");
return;
}
WritableIntegerDataStore map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP);
int cnum = 0;
for (Cluster<?> clu : c.getAllClusters()) {
for (DBIDIter iter = clu.getIDs().iter(); iter.valid(); iter.advance()) {
map.putInt(iter, cnum);
}
++cnum;
}
for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
if (iter.getOffset() > 0) {
writer.append(' ');
}
writer.append(Integer.toString(map.intValue(iter)));
}
if (forceLabel != null) {
if (forceLabel.length() > 0) {
writer.append(' ').append(forceLabel);
}
} else {
writer.append(' ').append(c.getLongName());
}
writer.append('\n');
}
use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.
the class EvaluateDBCV method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param cl Clustering
*
* @return dbcv DBCV-index
*/
public double evaluateClustering(Database db, Relation<O> rel, Clustering<?> cl) {
final DistanceQuery<O> dq = rel.getDistanceQuery(distanceFunction);
List<? extends Cluster<?>> clusters = cl.getAllClusters();
final int numc = clusters.size();
// DBCV needs a "dimensionality".
@SuppressWarnings("unchecked") final Relation<? extends SpatialComparable> vrel = (Relation<? extends SpatialComparable>) rel;
final int dim = RelationUtil.dimensionality(vrel);
// precompute all core distances
ArrayDBIDs[] cids = new ArrayDBIDs[numc];
double[][] coreDists = new double[numc][];
for (int c = 0; c < numc; c++) {
Cluster<?> cluster = clusters.get(c);
// Singletons are considered as Noise, because they have no sparseness
if (cluster.isNoise() || cluster.size() < 2) {
coreDists[c] = null;
continue;
}
// Store for use below:
ArrayDBIDs ids = cids[c] = DBIDUtil.ensureArray(cluster.getIDs());
double[] clusterCoreDists = coreDists[c] = new double[ids.size()];
for (DBIDArrayIter it = ids.iter(), it2 = ids.iter(); it.valid(); it.advance()) {
double currentCoreDist = 0;
int neighbors = 0;
for (it2.seek(0); it2.valid(); it2.advance()) {
if (DBIDUtil.equal(it, it2)) {
continue;
}
double dist = dq.distance(it, it2);
// We ignore such objects.
if (dist > 0) {
currentCoreDist += MathUtil.powi(1. / dist, dim);
++neighbors;
}
}
// Average, and undo power.
clusterCoreDists[it.getOffset()] = FastMath.pow(currentCoreDist / neighbors, -1. / dim);
}
}
// compute density sparseness of all clusters
int[][] clusterDegrees = new int[numc][];
double[] clusterDscMax = new double[numc];
// describes if a cluster contains any internal edges
boolean[] internalEdges = new boolean[numc];
for (int c = 0; c < numc; c++) {
Cluster<?> cluster = clusters.get(c);
if (cluster.isNoise() || cluster.size() < 2) {
clusterDegrees[c] = null;
clusterDscMax[c] = Double.NaN;
continue;
}
double[] clusterCoreDists = coreDists[c];
ArrayDBIDs ids = cids[c];
// Density Sparseness of the Cluster
double dscMax = 0;
double[][] distances = new double[cluster.size()][cluster.size()];
// create mutability distance matrix for Minimum Spanning Tree
for (DBIDArrayIter it = ids.iter(), it2 = ids.iter(); it.valid(); it.advance()) {
double currentCoreDist = clusterCoreDists[it.getOffset()];
for (it2.seek(it.getOffset() + 1); it2.valid(); it2.advance()) {
double mutualReachDist = MathUtil.max(currentCoreDist, clusterCoreDists[it2.getOffset()], dq.distance(it, it2));
distances[it.getOffset()][it2.getOffset()] = mutualReachDist;
distances[it2.getOffset()][it.getOffset()] = mutualReachDist;
}
}
// generate Minimum Spanning Tree
int[] nodes = PrimsMinimumSpanningTree.processDense(distances);
// get degree of all nodes in the spanning tree
int[] degree = new int[cluster.size()];
for (int i = 0; i < nodes.length; i++) {
degree[nodes[i]]++;
}
// check if cluster contains any internal edges
for (int i = 0; i < nodes.length; i += 2) {
if (degree[nodes[i]] > 1 && degree[nodes[i + 1]] > 1) {
internalEdges[c] = true;
}
}
clusterDegrees[c] = degree;
// find maximum sparseness in the Minimum Spanning Tree
for (int i = 0; i < nodes.length; i = i + 2) {
final int n1 = nodes[i], n2 = nodes[i + 1];
// If a cluster has no internal nodes we consider all edges.
if (distances[n1][n2] > dscMax && (!internalEdges[c] || (degree[n1] > 1 && degree[n2] > 1))) {
dscMax = distances[n1][n2];
}
}
clusterDscMax[c] = dscMax;
}
// compute density separation of all clusters
double dbcv = 0;
for (int c = 0; c < numc; c++) {
Cluster<?> cluster = clusters.get(c);
if (cluster.isNoise() || cluster.size() < 2) {
continue;
}
double currentDscMax = clusterDscMax[c];
double[] clusterCoreDists = coreDists[c];
int[] currentDegree = clusterDegrees[c];
// minimal Density Separation of the Cluster
double dspcMin = Double.POSITIVE_INFINITY;
for (DBIDArrayIter it = cids[c].iter(); it.valid(); it.advance()) {
// nodes.
if (currentDegree[it.getOffset()] < 2 && internalEdges[c]) {
continue;
}
double currentCoreDist = clusterCoreDists[it.getOffset()];
for (int oc = 0; oc < numc; oc++) {
Cluster<?> ocluster = clusters.get(oc);
if (ocluster.isNoise() || ocluster.size() < 2 || cluster == ocluster) {
continue;
}
int[] oDegree = clusterDegrees[oc];
double[] oclusterCoreDists = coreDists[oc];
for (DBIDArrayIter it2 = cids[oc].iter(); it2.valid(); it2.advance()) {
if (oDegree[it2.getOffset()] < 2 && internalEdges[oc]) {
continue;
}
double mutualReachDist = MathUtil.max(currentCoreDist, oclusterCoreDists[it2.getOffset()], dq.distance(it, it2));
dspcMin = mutualReachDist < dspcMin ? mutualReachDist : dspcMin;
}
}
}
// compute DBCV
double vc = (dspcMin - currentDscMax) / MathUtil.max(dspcMin, currentDscMax);
double weight = cluster.size() / (double) rel.size();
dbcv += weight * vc;
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), cl, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Density Based Clustering Validation", dbcv, 0., Double.POSITIVE_INFINITY, 0., true);
db.getHierarchy().resultChanged(ev);
return dbcv;
}
use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.
the class AbstractAlgorithm method run.
@SuppressWarnings("unchecked")
@Override
public R run(Database database) {
final Object[] relations1;
final Class<?>[] signature1;
final Object[] relations2;
final Class<?>[] signature2;
// Build candidate method signatures
{
final TypeInformation[] inputs = getInputTypeRestriction();
relations1 = new Object[inputs.length + 1];
signature1 = new Class<?>[inputs.length + 1];
relations2 = new Object[inputs.length];
signature2 = new Class<?>[inputs.length];
// First parameter is the database
relations1[0] = database;
signature1[0] = Database.class;
// Other parameters are the bound relations
for (int i = 0; i < inputs.length; i++) {
// TODO: don't bind the same relation twice?
// But sometimes this is wanted (e.g. using projected distances)
relations1[i + 1] = database.getRelation(inputs[i]);
signature1[i + 1] = Relation.class;
relations2[i] = database.getRelation(inputs[i]);
signature2[i] = Relation.class;
}
}
// Find appropriate run method.
try {
Method runmethod1 = this.getClass().getMethod("run", signature1);
return (R) runmethod1.invoke(this, relations1);
} catch (NoSuchMethodException e) {
// continue below.
} catch (IllegalArgumentException | IllegalAccessException | SecurityException e) {
throw new APIViolationException("Invoking the real 'run' method failed.", e);
} catch (InvocationTargetException e) {
final Throwable cause = e.getTargetException();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
}
if (cause instanceof Error) {
throw (Error) cause;
}
throw new APIViolationException("Invoking the real 'run' method failed: " + cause.toString(), cause);
}
try {
Method runmethod2 = this.getClass().getMethod("run", signature2);
return (R) runmethod2.invoke(this, relations2);
} catch (NoSuchMethodException e) {
// continue below.
} catch (IllegalArgumentException | IllegalAccessException | SecurityException e) {
throw new APIViolationException("Invoking the real 'run' method failed.", e);
} catch (InvocationTargetException e) {
final Throwable cause = e.getTargetException();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
}
if (cause instanceof Error) {
throw (Error) cause;
}
throw new APIViolationException("Invoking the real 'run' method failed: " + cause.toString(), cause);
}
throw new APIViolationException("No appropriate 'run' method found.");
}
use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.
the class KNNJoinTest method doKNNJoin.
/**
* Actual test routine.
*
* @param inputparams
*/
void doKNNJoin(ListParameterization inputparams) {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
Relation<NumberVector> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// Euclidean
{
KNNJoin<DoubleVector, ?, ?> knnjoin = new KNNJoin<DoubleVector, RStarTreeNode, SpatialEntry>(EuclideanDistanceFunction.STATIC, 2);
Relation<KNNList> result = knnjoin.run(db);
MeanVariance meansize = new MeanVariance();
for (DBIDIter id = relation.getDBIDs().iter(); id.valid(); id.advance()) {
meansize.put(result.get(id).size());
}
org.junit.Assert.assertEquals("Euclidean mean 2NN set size", mean2nnEuclid, meansize.getMean(), 0.00001);
org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
}
// Manhattan
{
KNNJoin<DoubleVector, ?, ?> knnjoin = new KNNJoin<DoubleVector, RStarTreeNode, SpatialEntry>(ManhattanDistanceFunction.STATIC, 2);
Relation<KNNList> result = knnjoin.run(db);
MeanVariance meansize = new MeanVariance();
for (DBIDIter id = relation.getDBIDs().iter(); id.valid(); id.advance()) {
meansize.put(result.get(id).size());
}
org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
}
}
Aggregations