use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class ODIN method run.
/**
* Run the ODIN algorithm
*
* Tutorial note: the <em>signature</em> of this method depends on the types
* that we requested in the {@link #getInputTypeRestriction} method. Here we
* requested a single relation of type {@code O} , the data type of our
* distance function.
*
* @param database Database to run on.
* @param relation Relation to process.
* @return ODIN outlier result.
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get the query functions:
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnq = database.getKNNQuery(dq, k);
// Get the objects to process, and a data storage for counting and output:
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);
// Process all objects
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
// Find the nearest neighbors (using an index, if available!)
KNNList neighbors = knnq.getKNNForDBID(iter, k);
// For each neighbor, except ourselves, increase the in-degree:
for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
if (DBIDUtil.equal(iter, nei)) {
continue;
}
scores.put(nei, scores.doubleValue(nei) + 1);
}
}
// Compute maximum
double min = Double.POSITIVE_INFINITY, max = 0.0;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
min = Math.min(min, scores.doubleValue(iter));
max = Math.max(max, scores.doubleValue(iter));
}
// Wrap the result and add metadata.
// By actually specifying theoretical min, max and baseline, we get a better
// visualization (try it out - or see the screenshots in the tutorial)!
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k);
DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class FarthestPointsInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
DBIDVar prevmean = DBIDUtil.newVar(first);
means.add(first);
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
// Find farthest object:
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double val = Math.min(prev, distQ.distance(prevmean, it));
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, val);
}
if (val > maxdist) {
maxdist = val;
best.set(it);
}
}
// Add new mean:
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean.set(best);
means.add(best);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<NumberVector> means = new ArrayList<>(k);
if (ids.size() <= k) {
throw new AbortException("Don't use k-means with k >= data set size.");
}
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
T firstvec = relation.get(first);
means.add(firstvec);
// Initialize weights
double weightsum = initialWeights(weights, ids, firstvec, distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
}
double r = random.nextDouble() * weightsum, s = 0.;
DBIDIter it = ids.iter();
for (; s < r && it.valid(); it.advance()) {
s += weights.doubleValue(it);
}
if (!it.valid()) {
// Rare case, but happens due to floating math
// Decrease
weightsum -= (r - s);
// Retry
continue;
}
// Add new mean:
final T newmean = relation.get(it);
means.add(newmean);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
// Choose optimized version for double distances, if applicable.
weightsum = updateWeights(weights, ids, newmean, distQ);
}
// Explicitly destroy temporary data.
weights.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.
@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
@SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
means.add(first);
// Initialize weights
double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
}
double r = random.nextDouble() * weightsum;
while (r <= 0 && weightsum > Double.MIN_NORMAL) {
// Try harder to not choose 0.
r = random.nextDouble() * weightsum;
}
DBIDIter it = ids.iter();
for (; r > 0. && it.valid(); it.advance()) {
r -= weights.doubleValue(it);
}
// Add new mean:
means.add(it);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
weightsum = updateWeights(weights, ids, rel.get(it), distQ);
}
return means;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class SLINKHDBSCANLinearMemory method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
// We need array addressing later.
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Compute the core distances
// minPts + 1: ignore query point.
final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
// Temporary storage for m.
WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running HDBSCAN*-SLINK", ids.size(), LOG) : null;
// has to be an array for monotonicity reasons!
ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Steps 1,3,4 are exactly as in SLINK
step1(id, pi, lambda);
// Step 2 is modified to use a different distance
step2(id, processedIDs, distQ, coredists, m);
step3(id, pi, lambda, processedIDs, m);
step4(id, pi, lambda, processedIDs);
processedIDs.add(id);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
Aggregations