use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class OPTICSXi method extractClusters.
/**
* Extract clusters from a cluster order result.
*
* @param clusterOrderResult cluster order result
* @param relation Relation
* @param ixi Parameter 1 - Xi
* @param minpts Parameter minPts
*/
private Clustering<OPTICSModel> extractClusters(ClusterOrder clusterOrderResult, Relation<?> relation, double ixi, int minpts) {
ArrayDBIDs clusterOrder = clusterOrderResult.ids;
DoubleDataStore reach = clusterOrderResult.reachability;
DBIDArrayIter tmp = clusterOrder.iter();
DBIDVar tmp2 = DBIDUtil.newVar();
double mib = 0.0;
List<SteepArea> salist = keepsteep ? new ArrayList<SteepArea>() : null;
List<SteepDownArea> sdaset = new ArrayList<>();
final Clustering<OPTICSModel> clustering = new Clustering<>("OPTICS Xi-Clusters", "optics");
HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
FiniteProgress scanprog = LOG.isVerbose() ? new FiniteProgress("OPTICS Xi cluster extraction", clusterOrder.size(), LOG) : null;
for (SteepScanPosition scan = new SteepScanPosition(clusterOrderResult); scan.hasNext(); ) {
if (scanprog != null) {
scanprog.setProcessed(scan.index, LOG);
}
// Update maximum-inbetween
mib = MathUtil.max(mib, scan.getReachability());
// The last point cannot be the start of a steep area.
if (!scan.next.valid()) {
break;
}
// Xi-steep down area
if (scan.steepDown(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final double startval = scan.getReachability();
mib = 0.;
int startsteep = scan.index, endsteep = scan.index;
for (scan.next(); scan.hasNext(); scan.next()) {
// still steep - continue.
if (scan.steepDown(ixi)) {
endsteep = scan.index;
continue;
}
// Always stop looking after minpts "flat" steps.
if (!scan.steepDown(1.0) || scan.index - endsteep > minpts) {
break;
}
}
final SteepDownArea sda = new SteepDownArea(startsteep, endsteep, startval, 0);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep down area: " + sda.toString());
}
sdaset.add(sda);
if (salist != null) {
salist.add(sda);
}
continue;
}
// Xi-steep up area
if (scan.steepUp(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final SteepUpArea sua;
// Compute steep-up area
{
int startsteep = scan.index, endsteep = scan.index;
mib = scan.getReachability();
double esuccr = scan.getNextReachability();
// Find end of steep-up-area, eventually updating mib again
while (!Double.isInfinite(esuccr) && scan.hasNext()) {
scan.next();
// still steep - continue.
if (scan.steepUp(ixi)) {
endsteep = scan.index;
mib = scan.getReachability();
esuccr = scan.getNextReachability();
continue;
}
// Stop looking after minpts non-up steps.
if (!scan.steepUp(1.0) || scan.index - endsteep > minpts) {
break;
}
}
if (Double.isInfinite(esuccr)) {
scan.next();
}
sua = new SteepUpArea(startsteep, endsteep, esuccr);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep up area: " + sua.toString());
}
if (salist != null) {
salist.add(sua);
}
}
// Validate and computer clusters
// LOG.debug("SDA size:"+sdaset.size()+" "+sdaset);
ListIterator<SteepDownArea> sdaiter = sdaset.listIterator(sdaset.size());
// Iterate backwards for correct hierarchy generation.
while (sdaiter.hasPrevious()) {
SteepDownArea sda = sdaiter.previous();
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("Comparing: eU=" + mib + " SDA: " + sda.toString());
}
// Condition 3b: end-of-steep-up > maximum-in-between lower
if (mib * ixi < sda.getMib()) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("mib * ixi = " + mib * ixi + " >= sda.getMib() = " + sda.getMib());
}
continue;
}
// By default, clusters cover both the steep up and steep down area
int cstart = sda.getStartIndex(), cend = MathUtil.min(sua.getEndIndex(), clusterOrder.size() - 1);
// However, we sometimes have to adjust this (Condition 4):
{
// Case b)
if (sda.getMaximum() * ixi >= sua.getMaximum()) {
while (//
cstart < cend && reach.doubleValue(tmp.seek(cstart + 1)) > sua.getMaximum()) {
cstart++;
}
} else // Case c)
if (sua.getMaximum() * ixi >= sda.getMaximum()) {
while (//
cend > cstart && reach.doubleValue(tmp.seek(cend - 1)) > sda.getMaximum()) {
cend--;
}
}
// Case a) is the default
}
// removes common artifacts from the Xi method
if (!nocorrect) {
simplify: while (cend > cstart) {
clusterOrderResult.predecessor.assignVar(tmp.seek(cend), tmp2);
for (int i = cstart; i < cend; i++) {
if (DBIDUtil.equal(tmp2, tmp.seek(i))) {
break simplify;
}
}
// Not found.
--cend;
}
}
// Condition 3a: obey minpts
if (cend - cstart + 1 < minpts) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("MinPts not satisfied.");
}
continue;
}
// Build the cluster
ModifiableDBIDs dbids = DBIDUtil.newArray();
for (int idx = cstart; idx <= cend; idx++) {
tmp.seek(idx);
// Collect only unclaimed IDs.
if (unclaimedids.remove(tmp)) {
dbids.add(tmp);
}
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cend - cstart + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
Cluster<OPTICSModel> cluster = new Cluster<>("Cluster_" + cstart + "_" + cend, dbids, model);
// Build the hierarchy
{
Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
while (iter.hasNext()) {
Cluster<OPTICSModel> clus = iter.next();
OPTICSModel omodel = clus.getModel();
if (model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
clustering.addChildCluster(cluster, clus);
iter.remove();
}
}
}
curclusters.add(cluster);
}
continue;
}
// Flat - advance anyway.
scan.next();
}
if (scanprog != null) {
scanprog.setProcessed(clusterOrder.size(), LOG);
}
if (!unclaimedids.isEmpty()) {
boolean noise = reach.doubleValue(tmp.seek(clusterOrder.size() - 1)) >= Double.POSITIVE_INFINITY;
Cluster<OPTICSModel> allcluster = new Cluster<>(noise ? "Noise" : "Cluster", unclaimedids, noise, new OPTICSModel(0, clusterOrder.size() - 1));
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addChildCluster(allcluster, cluster);
}
clustering.addToplevelCluster(allcluster);
} else {
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addToplevelCluster(cluster);
}
}
clustering.addChildResult(clusterOrderResult);
if (salist != null) {
clusterOrderResult.addChildResult(new SteepAreaResult(salist));
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class ByLabelClustering method assign.
/**
* Assigns the specified id to the labelMap according to its label
*
* @param labelMap the mapping of label to ids
* @param label the label of the object to be assigned
* @param id the id of the object to be assigned
*/
private void assign(HashMap<String, DBIDs> labelMap, String label, DBIDRef id) {
if (labelMap.containsKey(label)) {
DBIDs exist = labelMap.get(label);
if (exist instanceof DBID) {
ModifiableDBIDs n = DBIDUtil.newHashSet();
n.add((DBID) exist);
n.add(id);
labelMap.put(label, n);
} else {
assert (exist instanceof HashSetModifiableDBIDs);
assert (exist.size() > 1);
((ModifiableDBIDs) exist).add(id);
}
} else {
labelMap.put(label, DBIDUtil.deref(id));
}
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class ByLabelHierarchicalClustering method assign.
/**
* Assigns the specified id to the labelMap according to its label
*
* @param labelMap the mapping of label to ids
* @param label the label of the object to be assigned
* @param id the id of the object to be assigned
*/
private void assign(HashMap<String, DBIDs> labelMap, String label, DBIDRef id) {
if (labelMap.containsKey(label)) {
DBIDs exist = labelMap.get(label);
if (exist instanceof DBID) {
ModifiableDBIDs n = DBIDUtil.newHashSet();
n.add((DBID) exist);
n.add(id);
labelMap.put(label, n);
} else {
assert (exist instanceof HashSetModifiableDBIDs);
assert (exist.size() > 1);
((ModifiableDBIDs) exist).add(id);
}
} else {
labelMap.put(label, DBIDUtil.deref(id));
}
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class SpacefillingMaterializeKNNPreprocessor method preprocess.
@Override
protected void preprocess() {
// Prepare space filling curve:
final long starttime = System.currentTimeMillis();
final int size = relation.size();
final int numgen = curvegen.size();
final int numcurves = numgen * variants;
List<List<SpatialPair<DBID, NumberVector>>> curves = new ArrayList<>(numcurves);
for (int i = 0; i < numcurves; i++) {
curves.add(new ArrayList<SpatialPair<DBID, NumberVector>>(size));
}
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final NumberVector v = relation.get(iditer);
SpatialPair<DBID, NumberVector> ref = new SpatialPair<DBID, NumberVector>(DBIDUtil.deref(iditer), v);
for (List<SpatialPair<DBID, NumberVector>> curve : curves) {
curve.add(ref);
}
}
// Sort spatially
final double[] mms = SpatialSorter.computeMinMax(curves.get(0));
final double[] mmscratch = new double[mms.length];
final int numdim = mms.length >>> 1;
final int[] permutation = new int[numdim];
for (int j = 0; j < variants; j++) {
for (int i = 0; i < mms.length; i += 2) {
double len = mms[i + 1] - mms[i];
mmscratch[i] = mms[i] - len * random.nextDouble();
mmscratch[i + 1] = mms[i + 1] + len * random.nextDouble();
}
// Generate permutation:
for (int i = 0; i < numdim; i++) {
permutation[i] = i;
}
// Knuth / Fisher-Yates style shuffle
for (int i = numdim - 1; i > 0; i--) {
// Swap with random preceeding element.
int ri = random.nextInt(i + 1);
int tmp = permutation[ri];
permutation[ri] = permutation[i];
permutation[i] = tmp;
}
for (int i = 0; i < numgen; i++) {
curvegen.get(i).sort(curves.get(i + numgen * j), 0, size, mmscratch, permutation);
}
}
// Build position index, DBID -> position in the three curves
WritableDataStore<int[]> positions = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, int[].class);
for (int cnum = 0; cnum < numcurves; cnum++) {
Iterator<SpatialPair<DBID, NumberVector>> it = curves.get(cnum).iterator();
for (int i = 0; it.hasNext(); i++) {
SpatialPair<DBID, NumberVector> r = it.next();
final int[] data;
if (cnum == 0) {
data = new int[numcurves];
positions.put(r.first, data);
} else {
data = positions.get(r.first);
}
data[cnum] = i;
}
}
// Convert to final storage
final int wsize = (int) Math.ceil(window * k);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
HashSetModifiableDBIDs cands = DBIDUtil.newHashSet(2 * wsize * numcurves);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Get candidates.
cands.clear();
int[] posi = positions.get(iditer);
for (int i = 0; i < posi.length; i++) {
List<SpatialPair<DBID, NumberVector>> curve = curves.get(i);
final int start = Math.max(0, posi[i] - wsize);
final int end = Math.min(posi[i] + wsize + 1, curve.size());
for (int pos = start; pos < end; pos++) {
cands.add(curve.get(pos).first);
}
}
int distc = 0;
KNNHeap heap = DBIDUtil.newHeap(k);
O vec = relation.get(iditer);
for (DBIDIter iter = cands.iter(); iter.valid(); iter.advance()) {
heap.insert(distanceQuery.distance(vec, iter), iter);
distc++;
}
storage.put(iditer, heap.toKNNList());
mean.put(distc / (double) k);
}
final long end = System.currentTimeMillis();
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(this.getClass().getCanonicalName() + ".construction-time.ms", end - starttime));
}
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class MaximumF1EvaluationTest method testMaximumF1.
/**
* Test maximum F1 score computation
*/
@Test
public void testMaximumF1() {
HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
positive.add(DBIDUtil.importInteger(1));
positive.add(DBIDUtil.importInteger(2));
positive.add(DBIDUtil.importInteger(3));
positive.add(DBIDUtil.importInteger(4));
positive.add(DBIDUtil.importInteger(5));
final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
// P: 1.0 R: 0.2
distances.add(0.0, DBIDUtil.importInteger(1));
// P: 1.0 R: 0.4
distances.add(1.0, DBIDUtil.importInteger(2));
// P: 2/3 R: 0.4
distances.add(2.0, DBIDUtil.importInteger(6));
// P: 0.5 R: 0.4
distances.add(3.0, DBIDUtil.importInteger(7));
// P: 0.6 R: 0.6
distances.add(3.0, DBIDUtil.importInteger(3));
// P: 0.5 R: 0.6
distances.add(4.0, DBIDUtil.importInteger(8));
// P: 4/7 R: 0.8
distances.add(4.0, DBIDUtil.importInteger(4));
// P: 0.5 R: 0.8
distances.add(5.0, DBIDUtil.importInteger(9));
// P: 5/9 R: 1.0
distances.add(6.0, DBIDUtil.importInteger(5));
double maxf1 = new MaximumF1Evaluation().evaluate(positive, distances);
assertEquals("Maximum F1 not correct.", 0.7142857142857143, maxf1, 1e-14);
}
Aggregations