use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.
the class AbstractKMeans method medians.
/**
* Returns the median vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param medians the recent medians
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
protected double[][] medians(List<? extends DBIDs> clusters, double[][] medians, Relation<? extends NumberVector> database) {
final int dim = medians[0].length;
final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
double[][] newMedians = new double[k][];
for (int i = 0; i < k; i++) {
DBIDs clu = clusters.get(i);
if (clu.size() <= 0) {
newMedians[i] = medians[i];
continue;
}
ArrayModifiableDBIDs list = DBIDUtil.newArray(clu);
DBIDArrayIter it = list.iter();
double[] mean = new double[dim];
for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
it.seek(QuickSelectDBIDs.median(list, sorter));
mean[d] = database.get(it).doubleValue(d);
}
newMedians[i] = mean;
}
return newMedians;
}
use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.
the class HiCS method buildOneDimIndexes.
/**
* Calculates "index structures" for every attribute, i.e. sorts a
* ModifiableArray of every DBID in the database for every dimension and
* stores them in a list
*
* @param relation Relation to index
* @return List of sorted objects
*/
private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
for (int i = 0; i < dim; i++) {
ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
comp.setDimension(i);
amDBIDs.sort(comp);
subspaceIndex.add(amDBIDs);
}
return subspaceIndex;
}
use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.
the class RelationSortingTest method testSorting.
@Test
public void testSorting() {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(filename, -1);
Relation<? extends NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(rel.getDBIDs());
final int size = rel.size();
int dims = RelationUtil.dimensionality(rel);
SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(rel);
for (int d = 0; d < dims; d++) {
sorter.setDimension(d);
ids.sort(sorter);
assertEquals("Lost some DBID during sorting?!?", size, DBIDUtil.newHashSet(ids).size());
DBIDArrayIter it = ids.iter();
double prev = rel.get(it).doubleValue(d);
for (it.advance(); it.valid(); it.advance()) {
double next = rel.get(it).doubleValue(d);
assertTrue("Not correctly sorted: " + prev + " > " + next + " at pos " + it.getOffset(), prev <= next);
prev = next;
}
}
}
use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.
the class P3C method partitionData.
/**
* Partition the data set into {@code bins} bins in each dimension
* <i>independently</i>.
*
* This can be used to construct a grid approximation of the data using O(d n)
* memory.
*
* When a dimension is found to be constant, it will not be partitioned, but
* instead the corresponding array will be set to {@code null}.
*
* @param relation Data relation to partition
* @param bins Number of bins
* @return Partitions of each dimension.
*/
private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
final int dim = RelationUtil.dimensionality(relation);
SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
// will be reused.
DBIDArrayIter iter = ids.iter();
SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
ids.sort(sorter);
// Minimum:
iter.seek(0);
double min = relation.get(iter).doubleValue(d);
// Extend:
iter.seek(ids.size() - 1);
double delta = (relation.get(iter).doubleValue(d) - min) / bins;
if (delta > 0.) {
SetDBIDs[] dimparts = partitions[d];
double split = min + delta;
HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
dimparts[0] = pids;
int i = 0;
for (iter.seek(0); iter.valid(); iter.advance()) {
final double v = relation.get(iter).doubleValue(d);
if (v <= split || i == dimparts.length - 1) {
pids.add(iter);
} else {
i++;
split += delta;
pids = DBIDUtil.newHashSet();
dimparts[i] = pids;
}
}
for (++i; i < dimparts.length; ++i) {
dimparts[i] = pids;
}
} else {
// Flag whole dimension as bad
partitions[d] = null;
}
}
return partitions;
}
use of de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension in project elki by elki-project.
the class AbstractAggarwalYuOutlier method buildRanges.
/**
* Grid discretization of the data:<br />
* Each attribute of data is divided into phi equi-depth ranges.<br />
* Each range contains a fraction f=1/phi of the records.
*
* @param relation Relation to process
* @return range map
*/
protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
final int size = relation.size();
final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>();
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(relation);
// Split into cells
final double part = size * 1.0 / phi;
for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
ids.sort(sorter);
ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1);
int start = 0;
DBIDArrayIter iter = ids.iter();
for (int r = 1; r <= phi; r++) {
int end = (r < phi) ? (int) (part * r) : size;
ArrayModifiableDBIDs currange = DBIDUtil.newArray(end - start);
for (iter.seek(start); iter.getOffset() < end; iter.advance()) {
currange.add(iter);
}
start = end;
dimranges.add(currange);
}
ranges.add(dimranges);
}
return ranges;
}
Aggregations