Search in sources :

Example 1 with SortDBIDsBySingleDimension

use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.

the class AbstractKMeans method medians.

/**
 * Returns the median vectors of the given clusters in the given database.
 *
 * @param clusters the clusters to compute the means
 * @param medians the recent medians
 * @param database the database containing the vectors
 * @return the mean vectors of the given clusters in the given database
 */
protected double[][] medians(List<? extends DBIDs> clusters, double[][] medians, Relation<? extends NumberVector> database) {
    final int dim = medians[0].length;
    final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
    double[][] newMedians = new double[k][];
    for (int i = 0; i < k; i++) {
        DBIDs clu = clusters.get(i);
        if (clu.size() <= 0) {
            newMedians[i] = medians[i];
            continue;
        }
        ArrayModifiableDBIDs list = DBIDUtil.newArray(clu);
        DBIDArrayIter it = list.iter();
        double[] mean = new double[dim];
        for (int d = 0; d < dim; d++) {
            sorter.setDimension(d);
            it.seek(QuickSelectDBIDs.median(list, sorter));
            mean[d] = database.get(it).doubleValue(d);
        }
        newMedians[i] = mean;
    }
    return newMedians;
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)

Example 2 with SortDBIDsBySingleDimension

use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.

the class HiCS method buildOneDimIndexes.

/**
 * Calculates "index structures" for every attribute, i.e. sorts a
 * ModifiableArray of every DBID in the database for every dimension and
 * stores them in a list
 *
 * @param relation Relation to index
 * @return List of sorted objects
 */
private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
    SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
    for (int i = 0; i < dim; i++) {
        ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
        comp.setDimension(i);
        amDBIDs.sort(comp);
        subspaceIndex.add(amDBIDs);
    }
    return subspaceIndex;
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayList(java.util.ArrayList)

Example 3 with SortDBIDsBySingleDimension

use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.

the class RelationSortingTest method testSorting.

@Test
public void testSorting() {
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(filename, -1);
    Relation<? extends NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(rel.getDBIDs());
    final int size = rel.size();
    int dims = RelationUtil.dimensionality(rel);
    SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(rel);
    for (int d = 0; d < dims; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        assertEquals("Lost some DBID during sorting?!?", size, DBIDUtil.newHashSet(ids).size());
        DBIDArrayIter it = ids.iter();
        double prev = rel.get(it).doubleValue(d);
        for (it.advance(); it.valid(); it.advance()) {
            double next = rel.get(it).doubleValue(d);
            assertTrue("Not correctly sorted: " + prev + " > " + next + " at pos " + it.getOffset(), prev <= next);
            prev = next;
        }
    }
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Test(org.junit.Test) AbstractSimpleAlgorithmTest(de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)

Example 4 with SortDBIDsBySingleDimension

use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.

the class P3C method partitionData.

/**
 * Partition the data set into {@code bins} bins in each dimension
 * <i>independently</i>.
 *
 * This can be used to construct a grid approximation of the data using O(d n)
 * memory.
 *
 * When a dimension is found to be constant, it will not be partitioned, but
 * instead the corresponding array will be set to {@code null}.
 *
 * @param relation Data relation to partition
 * @param bins Number of bins
 * @return Partitions of each dimension.
 */
private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
    final int dim = RelationUtil.dimensionality(relation);
    SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    // will be reused.
    DBIDArrayIter iter = ids.iter();
    SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
    for (int d = 0; d < dim; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        // Minimum:
        iter.seek(0);
        double min = relation.get(iter).doubleValue(d);
        // Extend:
        iter.seek(ids.size() - 1);
        double delta = (relation.get(iter).doubleValue(d) - min) / bins;
        if (delta > 0.) {
            SetDBIDs[] dimparts = partitions[d];
            double split = min + delta;
            HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
            dimparts[0] = pids;
            int i = 0;
            for (iter.seek(0); iter.valid(); iter.advance()) {
                final double v = relation.get(iter).doubleValue(d);
                if (v <= split || i == dimparts.length - 1) {
                    pids.add(iter);
                } else {
                    i++;
                    split += delta;
                    pids = DBIDUtil.newHashSet();
                    dimparts[i] = pids;
                }
            }
            for (++i; i < dimparts.length; ++i) {
                dimparts[i] = pids;
            }
        } else {
            // Flag whole dimension as bad
            partitions[d] = null;
        }
    }
    return partitions;
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Example 5 with SortDBIDsBySingleDimension

use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.

the class AbstractAggarwalYuOutlier method buildRanges.

/**
 * Grid discretization of the data:<br />
 * Each attribute of data is divided into phi equi-depth ranges.<br />
 * Each range contains a fraction f=1/phi of the records.
 *
 * @param relation Relation to process
 * @return range map
 */
protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final int size = relation.size();
    final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>();
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(relation);
    // Split into cells
    final double part = size * 1.0 / phi;
    for (int d = 0; d < dim; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1);
        int start = 0;
        DBIDArrayIter iter = ids.iter();
        for (int r = 1; r <= phi; r++) {
            int end = (r < phi) ? (int) (part * r) : size;
            ArrayModifiableDBIDs currange = DBIDUtil.newArray(end - start);
            for (iter.seek(start); iter.getOffset() < end; iter.advance()) {
                currange.add(iter);
            }
            start = end;
            dimranges.add(currange);
        }
        ranges.add(dimranges);
    }
    return ranges;
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ArrayList(java.util.ArrayList) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Aggregations

SortDBIDsBySingleDimension (de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension)5 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)3 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)2 ArrayList (java.util.ArrayList)2 AbstractSimpleAlgorithmTest (de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest)1 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)1 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)1 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)1 Test (org.junit.Test)1