use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class PAMInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
if (relation.size() < k) {
throw new AbortException("Database has less than k objects.");
}
// Ugly cast; but better than code duplication.
@SuppressWarnings("unchecked") Relation<O> rel = (Relation<O>) relation;
// Get a distance query
@SuppressWarnings("unchecked") final PrimitiveDistanceFunction<? super O> distF = (PrimitiveDistanceFunction<? super O>) distanceFunction;
final DistanceQuery<O> distQ = database.getDistanceQuery(rel, distF);
DBIDs medids = chooseInitialMedoids(k, rel.getDBIDs(), distQ);
double[][] medoids = new double[k][];
DBIDIter iter = medids.iter();
for (int i = 0; i < k; i++, iter.advance()) {
medoids[i] = relation.get(iter).toArray();
}
return medoids;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SLINKHDBSCANLinearMemory method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
// We need array addressing later.
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Compute the core distances
// minPts + 1: ignore query point.
final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
// Temporary storage for m.
WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running HDBSCAN*-SLINK", ids.size(), LOG) : null;
// has to be an array for monotonicity reasons!
ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Steps 1,3,4 are exactly as in SLINK
step1(id, pi, lambda);
// Step 2 is modified to use a different distance
step2(id, processedIDs, distQ, coredists, m);
step3(id, pi, lambda, processedIDs, m);
step4(id, pi, lambda, processedIDs);
processedIDs.add(id);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SLINKHDBSCANLinearMemory method step2.
/**
* Second step: Determine the pairwise distances from all objects in the
* pointer representation to the new object with the specified id.
*
* @param id the id of the object to be inserted into the pointer
* representation
* @param processedIDs the already processed ids
* @param distQuery Distance query
* @param m Data store
*/
private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<? super O> distQuery, DoubleDataStore coredists, WritableDoubleDataStore m) {
double coreP = coredists.doubleValue(id);
for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
// M(i) = dist(i, n+1)
double coreQ = coredists.doubleValue(it);
double dist = MathUtil.max(coreP, coreQ, distQuery.distance(id, it));
m.putDouble(it, dist);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class PreDeConNeighborPredicate method computeLocalModel.
@Override
protected PreDeConModel computeLocalModel(DBIDRef id, DoubleDBIDList neighbors, Relation<V> relation) {
final int referenceSetSize = neighbors.size();
mvSize.put(referenceSetSize);
// Shouldn't happen:
if (referenceSetSize < 0) {
LOG.warning("Empty reference set - should at least include the query point!");
return new PreDeConModel(Integer.MAX_VALUE, DBIDUtil.EMPTYDBIDS);
}
V obj = relation.get(id);
final int dim = obj.getDimensionality();
// Per-dimension variances:
double[] s = new double[dim];
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
V o = relation.get(neighbor);
for (int d = 0; d < dim; d++) {
final double diff = obj.doubleValue(d) - o.doubleValue(d);
s[d] += diff * diff;
}
}
// Adjust for sample size
for (int d = 0; d < dim; d++) {
s[d] /= referenceSetSize;
mvVar.put(s[d]);
}
// Preference weight vector
double[] weights = new double[dim];
int pdim = 0;
for (int d = 0; d < dim; d++) {
if (s[d] <= settings.delta) {
weights[d] = settings.kappa;
pdim++;
} else {
weights[d] = 1.;
}
}
// Check which neighbors survive
HashSetModifiableDBIDs survivors = DBIDUtil.newHashSet(referenceSetSize);
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
V o = relation.get(neighbor);
// Weighted Euclidean distance:
double dev = 0.;
for (int d = 0; d < dim; d++) {
final double diff = obj.doubleValue(d) - o.doubleValue(d);
dev += weights[d] * diff * diff;
}
// Note: epsilon was squared - this saves us the sqrt here:
if (dev <= epsilon) {
survivors.add(neighbor);
}
}
return new PreDeConModel(pdim, survivors);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class BIRCHLeafClustering method run.
/**
* Run the clustering algorithm.
*
* @param relation Input data
* @return Clustering
*/
public Clustering<MeanModel> run(Relation<NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
CFTree tree = cffactory.newTree(relation.getDBIDs(), relation);
// The CFTree does not store points. We have to reassign them (and the
// quality is better than if we used the initial assignment, because centers
// move in particular in the beginning, so we always had many outliers.
Map<ClusteringFeature, ModifiableDBIDs> idmap = new HashMap<ClusteringFeature, ModifiableDBIDs>(tree.leaves);
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
ClusteringFeature cf = tree.findLeaf(relation.get(iter));
ModifiableDBIDs ids = idmap.get(cf);
if (ids == null) {
idmap.put(cf, ids = DBIDUtil.newArray(cf.n));
}
ids.add(iter);
}
Clustering<MeanModel> result = new Clustering<>("BIRCH-leaves", "BIRCH leaves");
for (Map.Entry<ClusteringFeature, ModifiableDBIDs> ent : idmap.entrySet()) {
ClusteringFeature leaf = ent.getKey();
double[] center = new double[dim];
for (int i = 0; i < dim; i++) {
center[i] = leaf.centroid(i);
}
result.addToplevelCluster(new Cluster<>(ent.getValue(), new MeanModel(center)));
}
return result;
}
Aggregations