use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class Eclat method extractItemsets.
private void extractItemsets(DBIDs iset, DBIDs[] idx, int[] buf, int depth, int start, int minsupp, List<Itemset> solution) {
// TODO: reuse arrays.
final int depth1 = depth + 1;
for (int i = start; i < idx.length; i++) {
if (idx[i] == null) {
continue;
}
DBIDs ids = mergeJoin(iset, idx[i]);
if (ids.size() < minsupp) {
continue;
}
buf[depth] = i;
int[] items = Arrays.copyOf(buf, depth1);
if (depth1 >= minlength) {
solution.add(new SparseItemset(items, ids.size()));
}
if (depth1 <= maxlength) {
extractItemsets(ids, idx, buf, depth1, i + 1, minsupp, solution);
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class ODIN method run.
/**
* Run the ODIN algorithm
*
* Tutorial note: the <em>signature</em> of this method depends on the types
* that we requested in the {@link #getInputTypeRestriction} method. Here we
* requested a single relation of type {@code O} , the data type of our
* distance function.
*
* @param database Database to run on.
* @param relation Relation to process.
* @return ODIN outlier result.
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get the query functions:
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnq = database.getKNNQuery(dq, k);
// Get the objects to process, and a data storage for counting and output:
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);
// Process all objects
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
// Find the nearest neighbors (using an index, if available!)
KNNList neighbors = knnq.getKNNForDBID(iter, k);
// For each neighbor, except ourselves, increase the in-degree:
for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
if (DBIDUtil.equal(iter, nei)) {
continue;
}
scores.put(nei, scores.doubleValue(nei) + 1);
}
}
// Compute maximum
double min = Double.POSITIVE_INFINITY, max = 0.0;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
min = Math.min(min, scores.doubleValue(iter));
max = Math.max(max, scores.doubleValue(iter));
}
// Wrap the result and add metadata.
// By actually specifying theoretical min, max and baseline, we get a better
// visualization (try it out - or see the screenshots in the tutorial)!
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k);
DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class CenterOfMassMetaClustering method run.
/**
* This run method will do the wrapping.
*
* Its called from {@link AbstractAlgorithm#run(Database)} and performs the
* call to the algorithms particular run method as well as the storing and
* comparison of the resulting Clusterings.
*
* @param database Database
* @param relation Data relation of uncertain objects
* @return Clustering result
*/
public C run(Database database, Relation<? extends UncertainObject> relation) {
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
// Build a relation storing the center of mass:
WritableDataStore<DoubleVector> store1 = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store1.put(iter, relation.get(iter).getCenterOfMass());
}
return runClusteringAlgorithm(database.getHierarchy(), relation, ids, store1, dim, "Uncertain Model: Center of Mass");
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class KMeansPlusPlusInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<NumberVector> means = new ArrayList<>(k);
if (ids.size() <= k) {
throw new AbortException("Don't use k-means with k >= data set size.");
}
Random random = rnd.getSingleThreadedRandom();
DBIDRef first = DBIDUtil.randomSample(ids, random);
T firstvec = relation.get(first);
means.add(firstvec);
// Initialize weights
double weightsum = initialWeights(weights, ids, firstvec, distQ);
while (true) {
if (weightsum > Double.MAX_VALUE) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
}
if (weightsum < Double.MIN_NORMAL) {
LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
}
double r = random.nextDouble() * weightsum, s = 0.;
DBIDIter it = ids.iter();
for (; s < r && it.valid(); it.advance()) {
s += weights.doubleValue(it);
}
if (!it.valid()) {
// Rare case, but happens due to floating math
// Decrease
weightsum -= (r - s);
// Retry
continue;
}
// Add new mean:
final T newmean = relation.get(it);
means.add(newmean);
if (means.size() >= k) {
break;
}
// Update weights:
weights.putDouble(it, 0.);
// Choose optimized version for double distances, if applicable.
weightsum = updateWeights(weights, ids, newmean, distQ);
}
// Explicitly destroy temporary data.
weights.destroy();
return unboxVectors(means);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDs in project elki by elki-project.
the class PAMInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
if (relation.size() < k) {
throw new AbortException("Database has less than k objects.");
}
// Ugly cast; but better than code duplication.
@SuppressWarnings("unchecked") Relation<O> rel = (Relation<O>) relation;
// Get a distance query
@SuppressWarnings("unchecked") final PrimitiveDistanceFunction<? super O> distF = (PrimitiveDistanceFunction<? super O>) distanceFunction;
final DistanceQuery<O> distQ = database.getDistanceQuery(rel, distF);
DBIDs medids = chooseInitialMedoids(k, rel.getDBIDs(), distQ);
double[][] medoids = new double[k][];
DBIDIter iter = medids.iter();
for (int i = 0; i < k; i++, iter.advance()) {
medoids[i] = relation.get(iter).toArray();
}
return medoids;
}
Aggregations