Search in sources :

Example 1 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class ODIN method run.

/**
 * Run the ODIN algorithm
 *
 * Tutorial note: the <em>signature</em> of this method depends on the types
 * that we requested in the {@link #getInputTypeRestriction} method. Here we
 * requested a single relation of type {@code O} , the data type of our
 * distance function.
 *
 * @param database Database to run on.
 * @param relation Relation to process.
 * @return ODIN outlier result.
 */
public OutlierResult run(Database database, Relation<O> relation) {
    // Get the query functions:
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(dq, k);
    // Get the objects to process, and a data storage for counting and output:
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);
    // Process all objects
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        // Find the nearest neighbors (using an index, if available!)
        KNNList neighbors = knnq.getKNNForDBID(iter, k);
        // For each neighbor, except ourselves, increase the in-degree:
        for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
            if (DBIDUtil.equal(iter, nei)) {
                continue;
            }
            scores.put(nei, scores.doubleValue(nei) + 1);
        }
    }
    // Compute maximum
    double min = Double.POSITIVE_INFINITY, max = 0.0;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        min = Math.min(min, scores.doubleValue(iter));
        max = Math.max(max, scores.doubleValue(iter));
    }
    // Wrap the result and add metadata.
    // By actually specifying theoretical min, max and baseline, we get a better
    // visualization (try it out - or see the screenshots in the tutorial)!
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k);
    DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
    return new OutlierResult(meta, rel);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 2 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class FarthestPointsInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> relation = (Relation<O>) distQ.getRelation();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    DBIDVar prevmean = DBIDUtil.newVar(first);
    means.add(first);
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        // Find farthest object:
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double val = Math.min(prev, distQ.distance(prevmean, it));
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, val);
            }
            if (val > maxdist) {
                maxdist = val;
                best.set(it);
            }
        }
        // Add new mean:
        if (i == 0) {
            // Remove temporary first element.
            means.clear();
        }
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean.set(best);
        means.add(best);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 3 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    // Chose first mean
    List<NumberVector> means = new ArrayList<>(k);
    if (ids.size() <= k) {
        throw new AbortException("Don't use k-means with k >= data set size.");
    }
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    T firstvec = relation.get(first);
    means.add(firstvec);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, firstvec, distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
        }
        double r = random.nextDouble() * weightsum, s = 0.;
        DBIDIter it = ids.iter();
        for (; s < r && it.valid(); it.advance()) {
            s += weights.doubleValue(it);
        }
        if (!it.valid()) {
            // Rare case, but happens due to floating math
            // Decrease
            weightsum -= (r - s);
            // Retry
            continue;
        }
        // Add new mean:
        final T newmean = relation.get(it);
        means.add(newmean);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        // Choose optimized version for double distances, if applicable.
        weightsum = updateWeights(weights, ids, newmean, distQ);
    }
    // Explicitly destroy temporary data.
    weights.destroy();
    return unboxVectors(means);
}
Also used : Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayList(java.util.ArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 4 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class KMeansPlusPlusInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    @SuppressWarnings("unchecked") final Relation<O> rel = (Relation<O>) distQ.getRelation();
    ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
    WritableDoubleDataStore weights = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
    Random random = rnd.getSingleThreadedRandom();
    DBIDRef first = DBIDUtil.randomSample(ids, random);
    means.add(first);
    // Initialize weights
    double weightsum = initialWeights(weights, ids, rel.get(first), distQ);
    while (true) {
        if (weightsum > Double.MAX_VALUE) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
        }
        if (weightsum < Double.MIN_NORMAL) {
            LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few unique data points?");
        }
        double r = random.nextDouble() * weightsum;
        while (r <= 0 && weightsum > Double.MIN_NORMAL) {
            // Try harder to not choose 0.
            r = random.nextDouble() * weightsum;
        }
        DBIDIter it = ids.iter();
        for (; r > 0. && it.valid(); it.advance()) {
            r -= weights.doubleValue(it);
        }
        // Add new mean:
        means.add(it);
        if (means.size() >= k) {
            break;
        }
        // Update weights:
        weights.putDouble(it, 0.);
        weightsum = updateWeights(weights, ids, rel.get(it), distQ);
    }
    return means;
}
Also used : Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) Random(java.util.Random) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class SLINKHDBSCANLinearMemory method run.

/**
 * Run the algorithm
 *
 * @param db Database
 * @param relation Relation
 * @return Clustering hierarchy
 */
public PointerDensityHierarchyRepresentationResult run(Database db, Relation<O> relation) {
    final DistanceQuery<O> distQ = db.getDistanceQuery(relation, getDistanceFunction());
    final KNNQuery<O> knnQ = db.getKNNQuery(distQ, minPts);
    // We need array addressing later.
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    // Compute the core distances
    // minPts + 1: ignore query point.
    final WritableDoubleDataStore coredists = computeCoreDists(ids, knnQ, minPts);
    WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
    // Temporary storage for m.
    WritableDoubleDataStore m = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running HDBSCAN*-SLINK", ids.size(), LOG) : null;
    // has to be an array for monotonicity reasons!
    ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
    for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
        // Steps 1,3,4 are exactly as in SLINK
        step1(id, pi, lambda);
        // Step 2 is modified to use a different distance
        step2(id, processedIDs, distQ, coredists, m);
        step3(id, pi, lambda, processedIDs, m);
        step4(id, pi, lambda, processedIDs);
        processedIDs.add(id);
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    return new PointerDensityHierarchyRepresentationResult(ids, pi, lambda, distQ.getDistanceFunction().isSquared(), coredists);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) WritableDBIDDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)90 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)70 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)68 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)61 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)43 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)33 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)20 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)12 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)12 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)12 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)12 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9