use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class RelationSortingTest method testSorting.
@Test
public void testSorting() {
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(filename, -1);
Relation<? extends NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
ArrayModifiableDBIDs ids = DBIDUtil.newArray(rel.getDBIDs());
final int size = rel.size();
int dims = RelationUtil.dimensionality(rel);
SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(rel);
for (int d = 0; d < dims; d++) {
sorter.setDimension(d);
ids.sort(sorter);
assertEquals("Lost some DBID during sorting?!?", size, DBIDUtil.newHashSet(ids).size());
DBIDArrayIter it = ids.iter();
double prev = rel.get(it).doubleValue(d);
for (it.advance(); it.valid(); it.advance()) {
double next = rel.get(it).doubleValue(d);
assertTrue("Not correctly sorted: " + prev + " > " + next + " at pos " + it.getOffset(), prev <= next);
prev = next;
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class InMemoryIDistanceIndex method rankReferencePoints.
/**
* Sort the reference points by distance to the query object
*
* @param distanceQuery Distance query
* @param obj Query object
* @param referencepoints Iterator for reference points
* @return Sorted array.
*/
protected static <O> DoubleIntPair[] rankReferencePoints(DistanceQuery<O> distanceQuery, O obj, ArrayDBIDs referencepoints) {
DoubleIntPair[] priority = new DoubleIntPair[referencepoints.size()];
// Compute distances to reference points.
for (DBIDArrayIter iter = referencepoints.iter(); iter.valid(); iter.advance()) {
final int i = iter.getOffset();
final double dist = distanceQuery.distance(obj, iter);
priority[i] = new DoubleIntPair(dist, i);
}
Arrays.sort(priority);
return priority;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class P3C method partitionData.
/**
* Partition the data set into {@code bins} bins in each dimension
* <i>independently</i>.
*
* This can be used to construct a grid approximation of the data using O(d n)
* memory.
*
* When a dimension is found to be constant, it will not be partitioned, but
* instead the corresponding array will be set to {@code null}.
*
* @param relation Data relation to partition
* @param bins Number of bins
* @return Partitions of each dimension.
*/
private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
final int dim = RelationUtil.dimensionality(relation);
SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
// will be reused.
DBIDArrayIter iter = ids.iter();
SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
ids.sort(sorter);
// Minimum:
iter.seek(0);
double min = relation.get(iter).doubleValue(d);
// Extend:
iter.seek(ids.size() - 1);
double delta = (relation.get(iter).doubleValue(d) - min) / bins;
if (delta > 0.) {
SetDBIDs[] dimparts = partitions[d];
double split = min + delta;
HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
dimparts[0] = pids;
int i = 0;
for (iter.seek(0); iter.valid(); iter.advance()) {
final double v = relation.get(iter).doubleValue(d);
if (v <= split || i == dimparts.length - 1) {
pids.add(iter);
} else {
i++;
split += delta;
pids = DBIDUtil.newHashSet();
dimparts[i] = pids;
}
}
for (++i; i < dimparts.length; ++i) {
dimparts[i] = pids;
}
} else {
// Flag whole dimension as bad
partitions[d] = null;
}
}
return partitions;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class PROCLUS method assignPoints.
/**
* Assigns the objects to the clusters.
*
* @param m_current Current centers
* @param dimensions set of correlated dimensions for each medoid of the
* cluster
* @param database the database containing the objects
* @return the assignments of the object to the clusters
*/
private ArrayList<PROCLUSCluster> assignPoints(ArrayDBIDs m_current, long[][] dimensions, Relation<V> database) {
ModifiableDBIDs[] clusterIDs = new ModifiableDBIDs[dimensions.length];
for (int i = 0; i < m_current.size(); i++) {
clusterIDs[i] = DBIDUtil.newHashSet();
}
DBIDArrayIter m_i = m_current.iter();
for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
V p = database.get(it);
double minDist = Double.NaN;
int best = -1, i = 0;
for (m_i.seek(0); m_i.valid(); m_i.advance(), i++) {
V m = database.get(m_i);
double currentDist = manhattanSegmentalDistance(p, m, dimensions[i]);
if (!(minDist <= currentDist)) {
minDist = currentDist;
best = i;
}
}
// add p to cluster with mindist
assert best >= 0;
clusterIDs[best].add(it);
}
ArrayList<PROCLUSCluster> clusters = new ArrayList<>(m_current.size());
for (int i = 0; i < dimensions.length; i++) {
ModifiableDBIDs objectIDs = clusterIDs[i];
if (!objectIDs.isEmpty()) {
long[] clusterDimensions = dimensions[i];
double[] centroid = Centroid.make(database, objectIDs).getArrayRef();
clusters.add(new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
} else {
clusters.add(null);
}
}
if (LOG.isDebugging()) {
LOG.debugFine(new StringBuilder().append("clusters ").append(clusters).toString());
}
return clusters;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class ClusteringVectorParser method nextEvent.
@Override
public Event nextEvent() {
if (nextevent != null) {
Event ret = nextevent;
nextevent = null;
return ret;
}
try {
while (reader.nextLineExceptComments()) {
buf1.clear();
lbl.clear();
Int2IntOpenHashMap csize = new Int2IntOpenHashMap();
String name = null;
for (; /* initialized by nextLineExceptComments() */
tokenizer.valid(); tokenizer.advance()) {
try {
int cnum = tokenizer.getIntBase10();
buf1.add(cnum);
// Update cluster sizes:
csize.addTo(cnum, 1);
} catch (NumberFormatException e) {
final String label = tokenizer.getSubstring();
lbl.add(label);
if (name == null) {
name = label;
}
}
}
if (name == null) {
name = "Cluster";
}
// Update meta on first record:
boolean metaupdate = (range == null);
if (range == null) {
range = DBIDUtil.generateStaticDBIDRange(buf1.size());
}
if (buf1.size() != range.size()) {
throw new AbortException("Clusterings do not contain the same number of elements!");
}
// Build clustering to store in the relation.
Int2ObjectOpenHashMap<ModifiableDBIDs> clusters = new Int2ObjectOpenHashMap<>(csize.size());
curclu = new Clustering<>(name, name);
for (ObjectIterator<Int2IntMap.Entry> iter = csize.int2IntEntrySet().fastIterator(); iter.hasNext(); ) {
Int2IntMap.Entry entry = iter.next();
if (entry.getIntValue() > 0) {
clusters.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
}
}
DBIDArrayIter iter = range.iter();
for (int i = 0; i < buf1.size(); i++) {
clusters.get(buf1.getInt(i)).add(iter.seek(i));
}
for (ModifiableDBIDs cids : clusters.values()) {
curclu.addToplevelCluster(new Cluster<Model>(cids, ClusterModel.CLUSTER));
}
// Label handling.
if (!haslbl && !lbl.isEmpty()) {
haslbl = true;
metaupdate = true;
}
curlbl = LabelList.make(lbl);
if (metaupdate) {
// Force a meta update.
nextevent = Event.NEXT_OBJECT;
return Event.META_CHANGED;
}
return Event.NEXT_OBJECT;
}
return Event.END_OF_STREAM;
} catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + reader.getLineNumber() + ".");
}
}
Aggregations