use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class PAMInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
ArrayModifiableDBIDs medids = DBIDUtil.newArray(k);
DBIDVar bestid = DBIDUtil.newVar();
// We need three temporary storage arrays:
WritableDoubleDataStore mindist, bestd, tempd;
mindist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
bestd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
tempd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// First mean is chosen by having the smallest distance sum to all others.
{
double best = Double.POSITIVE_INFINITY;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial mean", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double sum = 0, d;
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
sum += d = distQ.distance(iter, iter2);
tempd.putDouble(iter2, d);
}
if (sum < best) {
best = sum;
bestid.set(iter);
// Swap mindist and newd:
WritableDoubleDataStore temp = mindist;
mindist = tempd;
tempd = temp;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
medids.add(bestid);
}
assert (mindist != null);
// Subsequent means optimize the full criterion.
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial centers", k, LOG) : null;
// First one was just chosen.
LOG.incrementProcessed(prog);
for (int i = 1; i < k; i++) {
double best = Double.POSITIVE_INFINITY;
bestid.unset();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
if (medids.contains(iter)) {
continue;
}
double sum = 0., v;
for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
sum += v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2));
tempd.put(iter2, v);
}
if (sum < best) {
best = sum;
bestid.set(iter);
// Swap bestd and newd:
WritableDoubleDataStore temp = bestd;
bestd = tempd;
tempd = temp;
}
}
if (!bestid.isSet()) {
throw new AbortException("No median found that improves the criterion function?!? Too many infinite distances.");
}
medids.add(bestid);
// Swap bestd and mindist:
WritableDoubleDataStore temp = bestd;
bestd = mindist;
mindist = temp;
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
mindist.destroy();
bestd.destroy();
tempd.destroy();
return medids;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class KNNKernelDensityMinimaClustering method run.
/**
* Run the clustering algorithm on a data relation.
*
* @param relation Relation
* @return Clustering result
*/
public Clustering<ClusterModel> run(Relation<V> relation) {
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
final int size = ids.size();
// Sort by the sole dimension
ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
// Density storage.
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
LOG.beginStep(sprog, 1, "Kernel density estimation.");
{
double[] scratch = new double[2 * k];
iter.seek(0);
for (int i = 0; i < size; i++, iter.advance()) {
// Current value.
final double curv = relation.get(iter).doubleValue(dim);
final int pre = Math.max(i - k, 0), prek = i - pre;
final int pos = Math.min(i + k, size - 1), posk = pos - i;
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
scratch[j] = curv - relation.get(iter2).doubleValue(dim);
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
}
assert (prek + posk >= k);
double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
switch(mode) {
case BALLOON:
{
double dens = 0.;
if (kdist > 0.) {
for (int j = 0; j < prek + posk; j++) {
dens += kernel.density(scratch[j] / kdist);
}
} else {
dens = Double.POSITIVE_INFINITY;
}
assert (iter.getOffset() == i);
density.putDouble(iter, dens);
break;
}
case SAMPLE:
{
if (kdist > 0.) {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
} else {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
}
break;
}
default:
throw new UnsupportedOperationException("Unknown mode specified.");
}
}
}
LOG.beginStep(sprog, 2, "Local minima detection.");
Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
{
double[] scratch = new double[2 * minwindow + 1];
int begin = 0;
int halfw = (minwindow + 1) >> 1;
iter.seek(0);
// Fill initial buffer.
for (int i = 0; i < size; i++, iter.advance()) {
final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
scratch[m] = density.doubleValue(iter);
if (i > scratch.length) {
double min = Double.POSITIVE_INFINITY;
for (int j = 0; j < scratch.length; j++) {
if (j != t && scratch[j] < min) {
min = scratch[j];
}
}
// Local minimum:
if (scratch[t] < min) {
int end = i - minwindow + 1;
{
// Test on which side the kNN is
iter2.seek(end);
double curv = relation.get(iter2).doubleValue(dim);
iter2.seek(end - halfw);
double left = relation.get(iter2).doubleValue(dim) - curv;
iter2.seek(end + halfw);
double right = curv - relation.get(iter2).doubleValue(dim);
if (left < right) {
end++;
}
}
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
begin = end;
}
}
}
// Extract last cluster
int end = size;
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
}
LOG.ensureCompleted(sprog);
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class FarthestSumPointsInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
means.add(first);
DBIDVar prevmean = DBIDUtil.newVar(first);
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double dsum = prev + distQ.distance(prevmean, it);
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, dsum);
}
if (dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean:
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean.set(best);
means.add(best);
}
store.destroy();
return means;
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class ExternalClustering method attachToRelation.
/**
* Build a clustering from the file result.
*
* @param database Database
* @param r Result to attach to
* @param assignment Cluster assignment
* @param name Name
*/
private void attachToRelation(Database database, Relation<?> r, IntArrayList assignment, ArrayList<String> name) {
DBIDs ids = r.getDBIDs();
if (!(ids instanceof ArrayDBIDs)) {
throw new AbortException("External clusterings can only be used with static DBIDs.");
}
Int2IntOpenHashMap sizes = new Int2IntOpenHashMap();
for (IntListIterator it = assignment.iterator(); it.hasNext(); ) {
sizes.addTo(it.nextInt(), 1);
}
Int2ObjectOpenHashMap<ArrayModifiableDBIDs> cids = new Int2ObjectOpenHashMap<>(sizes.size());
for (ObjectIterator<Int2IntMap.Entry> it = sizes.int2IntEntrySet().fastIterator(); it.hasNext(); ) {
Int2IntMap.Entry entry = it.next();
cids.put(entry.getIntKey(), DBIDUtil.newArray(entry.getIntValue()));
}
{
DBIDArrayIter it = ((ArrayDBIDs) ids).iter();
for (int i = 0; i < assignment.size(); i++) {
cids.get(assignment.getInt(i)).add(it.seek(i));
}
}
String nam = FormatUtil.format(name, " ");
String snam = nam.toLowerCase().replace(' ', '-');
Clustering<ClusterModel> result = new Clustering<>(nam, snam);
for (ObjectIterator<Int2ObjectMap.Entry<ArrayModifiableDBIDs>> it = cids.int2ObjectEntrySet().fastIterator(); it.hasNext(); ) {
Int2ObjectMap.Entry<ArrayModifiableDBIDs> entry = it.next();
boolean noise = entry.getIntKey() < 0;
result.addToplevelCluster(new Cluster<>(entry.getValue(), noise, ClusterModel.CLUSTER));
}
database.getHierarchy().add(r, result);
}
use of de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs in project elki by elki-project.
the class RdKNNTree method bulkReverseKNNQueryForID.
public List<ModifiableDoubleDBIDList> bulkReverseKNNQueryForID(DBIDs ids, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
checkDistanceFunction(distanceFunction);
if (k > settings.k_max) {
throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
}
// get candidates
Map<DBID, ModifiableDoubleDBIDList> candidateMap = new HashMap<>();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
candidateMap.put(id, DBIDUtil.newDistanceDBIDList());
}
doBulkReverseKNN(getRoot(), ids, candidateMap);
if (k == settings.k_max) {
List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
candidates.sort();
resultList.add(candidates);
}
return resultList;
}
// refinement of candidates, if k < k_max
// perform a knn query for the candidates
ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray();
for (ModifiableDoubleDBIDList candidates : candidateMap.values()) {
candidateIDs.addDBIDs(candidates);
}
candidateIDs.sort();
List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
// and add candidate c to the result if o is a knn of c
List<ModifiableDoubleDBIDList> resultList = new ArrayList<>();
for (DBID id : candidateMap.keySet()) {
ModifiableDoubleDBIDList candidates = candidateMap.get(id);
ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
for (DoubleDBIDListIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
int pos = candidateIDs.binarySearch(candidate);
assert (pos >= 0);
for (DoubleDBIDListIter qr = knnLists.get(pos).iter(); qr.valid(); qr.advance()) {
if (DBIDUtil.equal(id, qr)) {
result.add(qr.doubleValue(), candidate);
break;
}
}
}
resultList.add(result);
}
return resultList;
}
Aggregations