Search in sources :

Example 21 with Relation

use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.

the class ClusteringVectorDumper method dumpClusteringOutput.

/**
 * Dump a single clustering result.
 *
 * @param writer Output writer
 * @param hierarchy Cluster hierarchy to process
 * @param c Clustering result
 */
protected void dumpClusteringOutput(PrintStream writer, ResultHierarchy hierarchy, Clustering<?> c) {
    DBIDRange ids = null;
    for (It<Relation<?>> iter = hierarchy.iterParents(c).filter(Relation.class); iter.valid(); iter.advance()) {
        DBIDs pids = iter.get().getDBIDs();
        if (pids instanceof DBIDRange) {
            ids = (DBIDRange) pids;
            break;
        }
        LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
    }
    // Fallback: try to locate a database.
    if (ids == null) {
        for (It<Database> iter = hierarchy.iterAll().filter(Database.class); iter.valid(); iter.advance()) {
            DBIDs pids = iter.get().getRelation(TypeUtil.ANY).getDBIDs();
            if (pids instanceof DBIDRange) {
                ids = (DBIDRange) pids;
                break;
            }
            LOG.warning("Parent result " + iter.get().getLongName() + " has DBID type " + pids.getClass());
        }
    }
    if (ids == null) {
        LOG.warning("Cannot dump cluster assignment, as I do not have a well-defined DBIDRange to use for a unique column assignment. DBIDs must be a continuous range.");
        return;
    }
    WritableIntegerDataStore map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP);
    int cnum = 0;
    for (Cluster<?> clu : c.getAllClusters()) {
        for (DBIDIter iter = clu.getIDs().iter(); iter.valid(); iter.advance()) {
            map.putInt(iter, cnum);
        }
        ++cnum;
    }
    for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
        if (iter.getOffset() > 0) {
            writer.append(' ');
        }
        writer.append(Integer.toString(map.intValue(iter)));
    }
    if (forceLabel != null) {
        if (forceLabel.length() > 0) {
            writer.append(' ').append(forceLabel);
        }
    } else {
        writer.append(' ').append(c.getLongName());
    }
    writer.append('\n');
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) Database(de.lmu.ifi.dbs.elki.database.Database) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 22 with Relation

use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.

the class EvaluateDBCV method evaluateClustering.

/**
 * Evaluate a single clustering.
 *
 * @param db Database
 * @param rel Data relation
 * @param cl Clustering
 *
 * @return dbcv DBCV-index
 */
public double evaluateClustering(Database db, Relation<O> rel, Clustering<?> cl) {
    final DistanceQuery<O> dq = rel.getDistanceQuery(distanceFunction);
    List<? extends Cluster<?>> clusters = cl.getAllClusters();
    final int numc = clusters.size();
    // DBCV needs a "dimensionality".
    @SuppressWarnings("unchecked") final Relation<? extends SpatialComparable> vrel = (Relation<? extends SpatialComparable>) rel;
    final int dim = RelationUtil.dimensionality(vrel);
    // precompute all core distances
    ArrayDBIDs[] cids = new ArrayDBIDs[numc];
    double[][] coreDists = new double[numc][];
    for (int c = 0; c < numc; c++) {
        Cluster<?> cluster = clusters.get(c);
        // Singletons are considered as Noise, because they have no sparseness
        if (cluster.isNoise() || cluster.size() < 2) {
            coreDists[c] = null;
            continue;
        }
        // Store for use below:
        ArrayDBIDs ids = cids[c] = DBIDUtil.ensureArray(cluster.getIDs());
        double[] clusterCoreDists = coreDists[c] = new double[ids.size()];
        for (DBIDArrayIter it = ids.iter(), it2 = ids.iter(); it.valid(); it.advance()) {
            double currentCoreDist = 0;
            int neighbors = 0;
            for (it2.seek(0); it2.valid(); it2.advance()) {
                if (DBIDUtil.equal(it, it2)) {
                    continue;
                }
                double dist = dq.distance(it, it2);
                // We ignore such objects.
                if (dist > 0) {
                    currentCoreDist += MathUtil.powi(1. / dist, dim);
                    ++neighbors;
                }
            }
            // Average, and undo power.
            clusterCoreDists[it.getOffset()] = FastMath.pow(currentCoreDist / neighbors, -1. / dim);
        }
    }
    // compute density sparseness of all clusters
    int[][] clusterDegrees = new int[numc][];
    double[] clusterDscMax = new double[numc];
    // describes if a cluster contains any internal edges
    boolean[] internalEdges = new boolean[numc];
    for (int c = 0; c < numc; c++) {
        Cluster<?> cluster = clusters.get(c);
        if (cluster.isNoise() || cluster.size() < 2) {
            clusterDegrees[c] = null;
            clusterDscMax[c] = Double.NaN;
            continue;
        }
        double[] clusterCoreDists = coreDists[c];
        ArrayDBIDs ids = cids[c];
        // Density Sparseness of the Cluster
        double dscMax = 0;
        double[][] distances = new double[cluster.size()][cluster.size()];
        // create mutability distance matrix for Minimum Spanning Tree
        for (DBIDArrayIter it = ids.iter(), it2 = ids.iter(); it.valid(); it.advance()) {
            double currentCoreDist = clusterCoreDists[it.getOffset()];
            for (it2.seek(it.getOffset() + 1); it2.valid(); it2.advance()) {
                double mutualReachDist = MathUtil.max(currentCoreDist, clusterCoreDists[it2.getOffset()], dq.distance(it, it2));
                distances[it.getOffset()][it2.getOffset()] = mutualReachDist;
                distances[it2.getOffset()][it.getOffset()] = mutualReachDist;
            }
        }
        // generate Minimum Spanning Tree
        int[] nodes = PrimsMinimumSpanningTree.processDense(distances);
        // get degree of all nodes in the spanning tree
        int[] degree = new int[cluster.size()];
        for (int i = 0; i < nodes.length; i++) {
            degree[nodes[i]]++;
        }
        // check if cluster contains any internal edges
        for (int i = 0; i < nodes.length; i += 2) {
            if (degree[nodes[i]] > 1 && degree[nodes[i + 1]] > 1) {
                internalEdges[c] = true;
            }
        }
        clusterDegrees[c] = degree;
        // find maximum sparseness in the Minimum Spanning Tree
        for (int i = 0; i < nodes.length; i = i + 2) {
            final int n1 = nodes[i], n2 = nodes[i + 1];
            // If a cluster has no internal nodes we consider all edges.
            if (distances[n1][n2] > dscMax && (!internalEdges[c] || (degree[n1] > 1 && degree[n2] > 1))) {
                dscMax = distances[n1][n2];
            }
        }
        clusterDscMax[c] = dscMax;
    }
    // compute density separation of all clusters
    double dbcv = 0;
    for (int c = 0; c < numc; c++) {
        Cluster<?> cluster = clusters.get(c);
        if (cluster.isNoise() || cluster.size() < 2) {
            continue;
        }
        double currentDscMax = clusterDscMax[c];
        double[] clusterCoreDists = coreDists[c];
        int[] currentDegree = clusterDegrees[c];
        // minimal Density Separation of the Cluster
        double dspcMin = Double.POSITIVE_INFINITY;
        for (DBIDArrayIter it = cids[c].iter(); it.valid(); it.advance()) {
            // nodes.
            if (currentDegree[it.getOffset()] < 2 && internalEdges[c]) {
                continue;
            }
            double currentCoreDist = clusterCoreDists[it.getOffset()];
            for (int oc = 0; oc < numc; oc++) {
                Cluster<?> ocluster = clusters.get(oc);
                if (ocluster.isNoise() || ocluster.size() < 2 || cluster == ocluster) {
                    continue;
                }
                int[] oDegree = clusterDegrees[oc];
                double[] oclusterCoreDists = coreDists[oc];
                for (DBIDArrayIter it2 = cids[oc].iter(); it2.valid(); it2.advance()) {
                    if (oDegree[it2.getOffset()] < 2 && internalEdges[oc]) {
                        continue;
                    }
                    double mutualReachDist = MathUtil.max(currentCoreDist, oclusterCoreDists[it2.getOffset()], dq.distance(it, it2));
                    dspcMin = mutualReachDist < dspcMin ? mutualReachDist : dspcMin;
                }
            }
        }
        // compute DBCV
        double vc = (dspcMin - currentDscMax) / MathUtil.max(dspcMin, currentDscMax);
        double weight = cluster.size() / (double) rel.size();
        dbcv += weight * vc;
    }
    EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), cl, "Internal Clustering Evaluation", "internal evaluation");
    MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
    g.addMeasure("Density Based Clustering Validation", dbcv, 0., Double.POSITIVE_INFINITY, 0., true);
    db.getHierarchy().resultChanged(ev);
    return dbcv;
}
Also used : DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) MeasurementGroup(de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup) EvaluationResult(de.lmu.ifi.dbs.elki.result.EvaluationResult) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) SpatialComparable(de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Example 23 with Relation

use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.

the class AbstractAlgorithm method run.

@SuppressWarnings("unchecked")
@Override
public R run(Database database) {
    final Object[] relations1;
    final Class<?>[] signature1;
    final Object[] relations2;
    final Class<?>[] signature2;
    // Build candidate method signatures
    {
        final TypeInformation[] inputs = getInputTypeRestriction();
        relations1 = new Object[inputs.length + 1];
        signature1 = new Class<?>[inputs.length + 1];
        relations2 = new Object[inputs.length];
        signature2 = new Class<?>[inputs.length];
        // First parameter is the database
        relations1[0] = database;
        signature1[0] = Database.class;
        // Other parameters are the bound relations
        for (int i = 0; i < inputs.length; i++) {
            // TODO: don't bind the same relation twice?
            // But sometimes this is wanted (e.g. using projected distances)
            relations1[i + 1] = database.getRelation(inputs[i]);
            signature1[i + 1] = Relation.class;
            relations2[i] = database.getRelation(inputs[i]);
            signature2[i] = Relation.class;
        }
    }
    // Find appropriate run method.
    try {
        Method runmethod1 = this.getClass().getMethod("run", signature1);
        return (R) runmethod1.invoke(this, relations1);
    } catch (NoSuchMethodException e) {
    // continue below.
    } catch (IllegalArgumentException | IllegalAccessException | SecurityException e) {
        throw new APIViolationException("Invoking the real 'run' method failed.", e);
    } catch (InvocationTargetException e) {
        final Throwable cause = e.getTargetException();
        if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        }
        if (cause instanceof Error) {
            throw (Error) cause;
        }
        throw new APIViolationException("Invoking the real 'run' method failed: " + cause.toString(), cause);
    }
    try {
        Method runmethod2 = this.getClass().getMethod("run", signature2);
        return (R) runmethod2.invoke(this, relations2);
    } catch (NoSuchMethodException e) {
    // continue below.
    } catch (IllegalArgumentException | IllegalAccessException | SecurityException e) {
        throw new APIViolationException("Invoking the real 'run' method failed.", e);
    } catch (InvocationTargetException e) {
        final Throwable cause = e.getTargetException();
        if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        }
        if (cause instanceof Error) {
            throw (Error) cause;
        }
        throw new APIViolationException("Invoking the real 'run' method failed: " + cause.toString(), cause);
    }
    throw new APIViolationException("No appropriate 'run' method found.");
}
Also used : Method(java.lang.reflect.Method) InvocationTargetException(java.lang.reflect.InvocationTargetException) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) Database(de.lmu.ifi.dbs.elki.database.Database) APIViolationException(de.lmu.ifi.dbs.elki.utilities.exceptions.APIViolationException)

Example 24 with Relation

use of de.lmu.ifi.dbs.elki.database.relation.Relation in project elki by elki-project.

the class KNNJoinTest method doKNNJoin.

/**
 * Actual test routine.
 *
 * @param inputparams
 */
void doKNNJoin(ListParameterization inputparams) {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
    Relation<NumberVector> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    // Euclidean
    {
        KNNJoin<DoubleVector, ?, ?> knnjoin = new KNNJoin<DoubleVector, RStarTreeNode, SpatialEntry>(EuclideanDistanceFunction.STATIC, 2);
        Relation<KNNList> result = knnjoin.run(db);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter id = relation.getDBIDs().iter(); id.valid(); id.advance()) {
            meansize.put(result.get(id).size());
        }
        org.junit.Assert.assertEquals("Euclidean mean 2NN set size", mean2nnEuclid, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001);
    }
    // Manhattan
    {
        KNNJoin<DoubleVector, ?, ?> knnjoin = new KNNJoin<DoubleVector, RStarTreeNode, SpatialEntry>(ManhattanDistanceFunction.STATIC, 2);
        Relation<KNNList> result = knnjoin.run(db);
        MeanVariance meansize = new MeanVariance();
        for (DBIDIter id = relation.getDBIDs().iter(); id.valid(); id.advance()) {
            meansize.put(result.get(id).size());
        }
        org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001);
        org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001);
    }
}
Also used : RStarTreeNode(de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) Database(de.lmu.ifi.dbs.elki.database.Database) StaticArrayDatabase(de.lmu.ifi.dbs.elki.database.StaticArrayDatabase) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SpatialEntry(de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)11 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)6 Database (de.lmu.ifi.dbs.elki.database.Database)5 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)4 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)4 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 DBIDRef (de.lmu.ifi.dbs.elki.database.ids.DBIDRef)3 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)3 Random (java.util.Random)3 PolygonsObject (de.lmu.ifi.dbs.elki.data.spatial.PolygonsObject)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)2 ModifiableRelation (de.lmu.ifi.dbs.elki.database.relation.ModifiableRelation)2 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)2 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)2 ArrayList (java.util.ArrayList)2