Search in sources :

Example 21 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class RelationSortingTest method testSorting.

@Test
public void testSorting() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(filename, -1);
    Relation<? extends NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(rel.getDBIDs());
    final int size = rel.size();
    int dims = RelationUtil.dimensionality(rel);
    SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(rel);
    for (int d = 0; d < dims; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        assertEquals("Lost some DBID during sorting?!?", size, DBIDUtil.newHashSet(ids).size());
        DBIDArrayIter it = ids.iter();
        double prev = rel.get(it).doubleValue(d);
        for (it.advance(); it.valid(); it.advance()) {
            double next = rel.get(it).doubleValue(d);
            assertTrue("Not correctly sorted: " + prev + " > " + next + " at pos " + it.getOffset(), prev <= next);
            prev = next;
        }
    }
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 22 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class InMemoryIDistanceIndex method rankReferencePoints.

/**
 * Sort the reference points by distance to the query object
 *
 * @param distanceQuery Distance query
 * @param obj Query object
 * @param referencepoints Iterator for reference points
 * @return Sorted array.
 */
protected static <O> DoubleIntPair[] rankReferencePoints(DistanceQuery<O> distanceQuery, O obj, ArrayDBIDs referencepoints) {
    DoubleIntPair[] priority = new DoubleIntPair[referencepoints.size()];
    // Compute distances to reference points.
    for (DBIDArrayIter iter = referencepoints.iter(); iter.valid(); iter.advance()) {
        final int i = iter.getOffset();
        final double dist = distanceQuery.distance(obj, iter);
        priority[i] = new DoubleIntPair(dist, i);
    }
    Arrays.sort(priority);
    return priority;
}
Also used : DoubleIntPair(de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 23 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class P3C method partitionData.

/**
 * Partition the data set into {@code bins} bins in each dimension
 * <i>independently</i>.
 *
 * This can be used to construct a grid approximation of the data using O(d n)
 * memory.
 *
 * When a dimension is found to be constant, it will not be partitioned, but
 * instead the corresponding array will be set to {@code null}.
 *
 * @param relation Data relation to partition
 * @param bins Number of bins
 * @return Partitions of each dimension.
 */
private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
    final int dim = RelationUtil.dimensionality(relation);
    SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    // will be reused.
    DBIDArrayIter iter = ids.iter();
    SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
    for (int d = 0; d < dim; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        // Minimum:
        iter.seek(0);
        double min = relation.get(iter).doubleValue(d);
        // Extend:
        iter.seek(ids.size() - 1);
        double delta = (relation.get(iter).doubleValue(d) - min) / bins;
        if (delta > 0.) {
            SetDBIDs[] dimparts = partitions[d];
            double split = min + delta;
            HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
            dimparts[0] = pids;
            int i = 0;
            for (iter.seek(0); iter.valid(); iter.advance()) {
                final double v = relation.get(iter).doubleValue(d);
                if (v <= split || i == dimparts.length - 1) {
                    pids.add(iter);
                } else {
                    i++;
                    split += delta;
                    pids = DBIDUtil.newHashSet();
                    dimparts[i] = pids;
                }
            }
            for (++i; i < dimparts.length; ++i) {
                dimparts[i] = pids;
            }
        } else {
            // Flag whole dimension as bad
            partitions[d] = null;
        }
    }
    return partitions;
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Example 24 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class PROCLUS method assignPoints.

/**
 * Assigns the objects to the clusters.
 *
 * @param m_current Current centers
 * @param dimensions set of correlated dimensions for each medoid of the
 *        cluster
 * @param database the database containing the objects
 * @return the assignments of the object to the clusters
 */
private ArrayList<PROCLUSCluster> assignPoints(ArrayDBIDs m_current, long[][] dimensions, Relation<V> database) {
    ModifiableDBIDs[] clusterIDs = new ModifiableDBIDs[dimensions.length];
    for (int i = 0; i < m_current.size(); i++) {
        clusterIDs[i] = DBIDUtil.newHashSet();
    }
    DBIDArrayIter m_i = m_current.iter();
    for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
        V p = database.get(it);
        double minDist = Double.NaN;
        int best = -1, i = 0;
        for (m_i.seek(0); m_i.valid(); m_i.advance(), i++) {
            V m = database.get(m_i);
            double currentDist = manhattanSegmentalDistance(p, m, dimensions[i]);
            if (!(minDist <= currentDist)) {
                minDist = currentDist;
                best = i;
            }
        }
        // add p to cluster with mindist
        assert best >= 0;
        clusterIDs[best].add(it);
    }
    ArrayList<PROCLUSCluster> clusters = new ArrayList<>(m_current.size());
    for (int i = 0; i < dimensions.length; i++) {
        ModifiableDBIDs objectIDs = clusterIDs[i];
        if (!objectIDs.isEmpty()) {
            long[] clusterDimensions = dimensions[i];
            double[] centroid = Centroid.make(database, objectIDs).getArrayRef();
            clusters.add(new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
        } else {
            clusters.add(null);
        }
    }
    if (LOG.isDebugging()) {
        LOG.debugFine(new StringBuilder().append("clusters ").append(clusters).toString());
    }
    return clusters;
}
Also used : ArrayList(java.util.ArrayList) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 25 with DBIDArrayIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.

the class ClusteringVectorParser method nextEvent.

@Override
public Event nextEvent() {
    if (nextevent != null) {
        Event ret = nextevent;
        nextevent = null;
        return ret;
    }
    try {
        while (reader.nextLineExceptComments()) {
            buf1.clear();
            lbl.clear();
            Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
            String name = null;
            for (; /* initialized by nextLineExceptComments() */
            tokenizer.valid(); tokenizer.advance()) {
                try {
                    int cnum = tokenizer.getIntBase10();
                    buf1.add(cnum);
                    // Update cluster sizes:
                    csize.addTo(cnum, 1);
                } catch (NumberFormatException e) {
                    final String label = tokenizer.getSubstring();
                    lbl.add(label);
                    if (name == null) {
                        name = label;
                    }
                }
            }
            if (name == null) {
                name = "Cluster";
            }
            // Update meta on first record:
            boolean metaupdate = (range == null);
            if (range == null) {
                range = DBIDUtil.generateStaticDBIDRange(buf1.size());
            }
            if (buf1.size() != range.size()) {
                throw new AbortException("Clusterings do not contain the same number of elements!");
            }
            // Build clustering to store in the relation.
            Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
            curclu = new Clustering<>(name, name);
            for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
                Int2IntMap.Entry entry = iter.next();
                if (entry.getIntValue() > 0) {
                    clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
                }
            }
            DBIDArrayIter iter = range.iter();
            for (int i = 0; i < buf1.size(); i++) {
                clusters.get(buf1.getInt(i)).add(iter.seek(i));
            }
            for (ModifiableDBIDs cids : clusters.values()) {
                curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
            }
            // Label handling.
            if (!haslbl && !lbl.isEmpty()) {
                haslbl = true;
                metaupdate = true;
            }
            curlbl = LabelList.make(lbl);
            if (metaupdate) {
                // Force a meta update.
                nextevent = Event.NEXT_OBJECT;
                return Event.META_CHANGED;
            }
            return Event.NEXT_OBJECT;
        }
        return Event.END_OF_STREAM;
    } catch (IOException e) {
        throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
    }
}
Also used : Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) IOException(java.io.IOException) Int2IntOpenHashMap(it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap) ClusterModel(de.lmu.ifi.dbs.elki.data.model.ClusterModel) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Int2IntMap(it.unimi.dsi.fastutil.ints.Int2IntMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)64 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)17 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)15 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)15 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)14 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)13 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)12 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)9 Test (org.junit.Test)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)6 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 IOException (java.io.IOException)5 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)4 DBIDVar (de.lmu.ifi.dbs.elki.database.ids.DBIDVar)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)3 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)3