Search in sources :

Example 6 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class PartitionApproximationMaterializeKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    MeanVariance ksize = new MeanVariance();
    if (LOG.isVerbose()) {
        LOG.verbose("Approximating nearest neighbor lists to database objects");
    }
    // Produce a random shuffling of the IDs:
    ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), partitions, rnd);
    FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions", partitions, LOG) : null;
    for (int part = 0; part < partitions; part++) {
        final ArrayDBIDs ids = parts[part];
        final int size = ids.size();
        Object2DoubleOpenHashMap<DBIDPair> cache = new Object2DoubleOpenHashMap<>((size * size * 3) >> 3);
        cache.defaultReturnValue(Double.NaN);
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            KNNHeap kNN = DBIDUtil.newHeap(k);
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                DBIDPair key = DBIDUtil.newPair(iter, iter2);
                double d = cache.removeDouble(key);
                if (d == d) {
                    // Not NaN
                    // consume the previous result.
                    kNN.insert(d, iter2);
                } else {
                    // compute new and store the previous result.
                    d = distanceQuery.distance(iter, iter2);
                    kNN.insert(d, iter2);
                    // put it into the cache, but with the keys reversed
                    key = DBIDUtil.newPair(iter2, iter);
                    cache.put(key, d);
                }
            }
            ksize.put(kNN.size());
            storage.put(iter, kNN.toKNNList());
        }
        if (LOG.isDebugging() && cache.size() > 0) {
            LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements.");
        }
        LOG.incrementProcessed(progress);
    }
    LOG.ensureCompleted(progress);
    if (LOG.isVerbose()) {
        LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned.");
    }
}
Also used : Object2DoubleOpenHashMap(it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DBIDPair(de.lmu.ifi.dbs.elki.database.ids.DBIDPair) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)

Example 7 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class RandomSampleKNNPreprocessor method preprocess.

@Override
protected void preprocess() {
    DistanceQuery<O> distanceQuery = relation.getDistanceQuery(distanceFunction);
    storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
    FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
    final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
    final int samplesize = (int) (ids.size() * share);
    Random random = rnd.getSingleThreadedRandom();
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        KNNHeap kNN = DBIDUtil.newHeap(k);
        DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, random);
        for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
            double dist = distanceQuery.distance(iter, iter2);
            kNN.insert(dist, iter2);
        }
        storage.put(iter, kNN.toKNNList());
        getLogger().incrementProcessed(progress);
    }
    getLogger().ensureCompleted(progress);
}
Also used : Random(java.util.Random) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) KNNHeap(de.lmu.ifi.dbs.elki.database.ids.KNNHeap) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 8 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class MaterializeKNNPreprocessor method objectsRemoved.

/**
 * Called after objects have been removed, updates the materialized
 * neighborhood.
 *
 * @param ids the ids of the removed objects
 */
protected void objectsRemoved(DBIDs ids) {
    final Logging log = getLogger();
    StepProgress stepprog = log.isVerbose() ? new StepProgress(3) : null;
    // delete the materialized (old) kNNs
    log.beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs.");
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        storage.delete(iter);
    }
    // update the affected kNNs
    log.beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs.");
    ArrayDBIDs rkNN_ids = updateKNNsAfterDeletion(ids);
    // inform listener
    log.beginStep(stepprog, 3, "New deletions ocurred, inform listeners.");
    fireKNNsRemoved(ids, rkNN_ids);
    log.ensureCompleted(stepprog);
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 9 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class HiCS method buildOneDimIndexes.

/**
 * Calculates "index structures" for every attribute, i.e. sorts a
 * ModifiableArray of every DBID in the database for every dimension and
 * stores them in a list
 *
 * @param relation Relation to index
 * @return List of sorted objects
 */
private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
    SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
    for (int i = 0; i < dim; i++) {
        ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
        comp.setDimension(i);
        amDBIDs.sort(comp);
        subspaceIndex.add(amDBIDs);
    }
    return subspaceIndex;
}
Also used : SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ArrayList(java.util.ArrayList)

Example 10 with ArrayDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs in project elki by elki-project.

the class GaussianUniformMixture method run.

/**
 * Run the algorithm
 *
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    // Use an array list of object IDs for fast random access by an offset
    ArrayDBIDs objids = DBIDUtil.ensureArray(relation.getDBIDs());
    // A bit set to flag objects as anomalous, none at the beginning
    long[] bits = BitsUtil.zero(objids.size());
    // Positive masked collection
    DBIDs normalObjs = new MaskedDBIDs(objids, bits, true);
    // Positive masked collection
    DBIDs anomalousObjs = new MaskedDBIDs(objids, bits, false);
    // resulting scores
    WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // compute loglikelihood
    double logLike = relation.size() * logml + loglikelihoodNormal(normalObjs, relation);
    // LOG.debugFine("normalsize " + normalObjs.size() + " anormalsize " +
    // anomalousObjs.size() + " all " + (anomalousObjs.size() +
    // normalObjs.size()));
    // LOG.debugFine(logLike + " loglike beginning" +
    // loglikelihoodNormal(normalObjs, database));
    DoubleMinMax minmax = new DoubleMinMax();
    DBIDIter iter = objids.iter();
    for (int i = 0; i < objids.size(); i++, iter.advance()) {
        // LOG.debugFine("i " + i);
        // Change mask to make the current object anomalous
        BitsUtil.setI(bits, i);
        // Compute new likelihoods
        double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
        // if the loglike increases more than a threshold, object stays in
        // anomalous set and is flagged as outlier
        final double loglikeGain = currentLogLike - logLike;
        oscores.putDouble(iter, loglikeGain);
        minmax.put(loglikeGain);
        if (loglikeGain > c) {
            // flag as outlier
            // LOG.debugFine("Outlier: " + curid + " " + (currentLogLike -
            // logLike));
            // Update best logLike
            logLike = currentLogLike;
        } else {
            // LOG.debugFine("Inlier: " + curid + " " + (currentLogLike - logLike));
            // undo bit set
            BitsUtil.clearI(bits, i);
        }
    }
    OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
    DoubleRelation res = new MaterializedDoubleRelation("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", oscores, relation.getDBIDs());
    return new OutlierResult(meta, res);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) MaskedDBIDs(de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) MaskedDBIDs(de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs)

Aggregations

ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)45 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)23 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)16 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)14 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)13 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)12 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)10 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)9 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)9 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)8 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)7 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)7 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)7 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)7 ArrayList (java.util.ArrayList)7 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)6 KNNHeap (de.lmu.ifi.dbs.elki.database.ids.KNNHeap)6 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)6 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)5