Search in sources :

Example 81 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class DeLiCluTreeIndex method insertAll.

/**
 * Inserts the specified objects into this index. If a bulk load mode is
 * implemented, the objects are inserted in one bulk.
 *
 * @param ids the objects to be inserted
 */
@Override
public final void insertAll(DBIDs ids) {
    if (ids.isEmpty() || (ids.size() == 1)) {
        return;
    }
    // Make an example leaf
    if (canBulkLoad()) {
        List<DeLiCluEntry> leafs = new ArrayList<>(ids.size());
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            leafs.add(createNewLeafEntry(DBIDUtil.deref(iter)));
        }
        bulkLoad(leafs);
    } else {
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            insert(iter);
        }
    }
    doExtraIntegrityChecks();
}
Also used : ArrayList(java.util.ArrayList) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 82 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class EMOutlier method run.

/**
 * Runs the algorithm in the timed evaluation part.
 *
 * @param database Database to process
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<V> relation) {
    emClustering.setSoft(true);
    Clustering<?> emresult = emClustering.run(database, relation);
    Relation<double[]> soft = null;
    for (It<Relation<double[]>> iter = emresult.getHierarchy().iterChildren(emresult).filter(Relation.class); iter.valid(); iter.advance()) {
        if (iter.get().getDataTypeInformation() == EM.SOFT_TYPE) {
            soft = iter.get();
        }
    }
    double globmax = 0.0;
    WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double maxProb = Double.POSITIVE_INFINITY;
        double[] probs = soft.get(iditer);
        for (double prob : probs) {
            maxProb = Math.min(1. - prob, maxProb);
        }
        emo_score.putDouble(iditer, maxProb);
        globmax = Math.max(maxProb, globmax);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("EM outlier scores", "em-outlier", emo_score, relation.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
    // combine results.
    OutlierResult result = new OutlierResult(meta, scoreres);
    // TODO: add a keep-EM flag?
    result.addChildResult(emresult);
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) Relation(de.lmu.ifi.dbs.elki.database.relation.Relation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 83 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class SilhouetteOutlierDetection method run.

@Override
public OutlierResult run(Database database) {
    Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    // TODO: improve ELKI api to ensure we're using the same DBIDs!
    Clustering<?> c = clusterer.run(database);
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
    DoubleMinMax mm = new DoubleMinMax();
    List<? extends Cluster<?>> clusters = c.getAllClusters();
    for (Cluster<?> cluster : clusters) {
        if (cluster.size() <= 1 || cluster.isNoise()) {
            switch(noiseOption) {
                case IGNORE_NOISE:
                case TREAT_NOISE_AS_SINGLETONS:
                    // As suggested in Rousseeuw, we use 0 for singletons.
                    for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
                        scores.put(iter, 0.);
                    }
                    mm.put(0.);
                    continue;
                case MERGE_NOISE:
                    // Treat as cluster below
                    break;
            }
        }
        ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
        // temporary storage.
        double[] as = new double[ids.size()];
        DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
        for (it1.seek(0); it1.valid(); it1.advance()) {
            // a: In-cluster distances
            // Already computed distances
            double a = as[it1.getOffset()];
            for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
                final double dist = dq.distance(it1, it2);
                a += dist;
                as[it2.getOffset()] += dist;
            }
            a /= (ids.size() - 1);
            // b: other clusters:
            double min = Double.POSITIVE_INFINITY;
            for (Cluster<?> ocluster : clusters) {
                if (ocluster == /* yes, reference identity */
                cluster) {
                    continue;
                }
                if (ocluster.isNoise()) {
                    switch(noiseOption) {
                        case IGNORE_NOISE:
                            continue;
                        case MERGE_NOISE:
                            // No special treatment
                            break;
                        case TREAT_NOISE_AS_SINGLETONS:
                            // Treat noise cluster as singletons:
                            for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
                                double dist = dq.distance(it1, it3);
                                if (dist < min) {
                                    min = dist;
                                }
                            }
                            continue;
                    }
                }
                final DBIDs oids = ocluster.getIDs();
                double b = 0.;
                for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
                    b += dq.distance(it1, it3);
                }
                b /= oids.size();
                if (b < min) {
                    min = b;
                }
            }
            final double score = (min - a) / Math.max(min, a);
            scores.put(it1, score);
            mm.put(score);
        }
    }
    // Build result representation.
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
    return new OutlierResult(scoreMeta, scoreResult);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 84 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class OutRankS1 method run.

@Override
public OutlierResult run(Database database) {
    DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
    // Run the primary algorithm
    Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
    WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        score.putDouble(iter, 0);
    }
    int maxdim = 0, maxsize = 0;
    // Find maximum dimensionality and cluster size
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        maxsize = Math.max(maxsize, cluster.size());
        maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
    }
    // Iterate over all clusters:
    DoubleMinMax minmax = new DoubleMinMax();
    for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
        double relsize = cluster.size() / (double) maxsize;
        double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
        // Process objects in the cluster
        for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
            double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
            score.putDouble(iter, newscore);
            minmax.put(newscore);
        }
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
    OutlierResult res = new OutlierResult(meta, scoreResult);
    res.addChildResult(clustering);
    return res;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 85 with DBIDIter

use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.

the class AddSingleScale method run.

/**
 * Add scales to a single vector relation.
 *
 * @param rel Relation
 * @return Scales
 */
private ScalesResult run(Relation<? extends NumberVector> rel) {
    final int dim = RelationUtil.dimensionality(rel);
    LinearScale[] scales = new LinearScale[dim];
    if (minmax == null) {
        DoubleMinMax mm = new DoubleMinMax();
        for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
            NumberVector vec = rel.get(iditer);
            for (int d = 0; d < dim; d++) {
                final double val = vec.doubleValue(d);
                if (val != val) {
                    // NaN
                    continue;
                }
                mm.put(val);
            }
        }
        LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
        for (int i = 0; i < dim; i++) {
            scales[i] = scale;
        }
    } else {
        // Use predefined.
        LinearScale scale = new LinearScale(minmax[0], minmax[1]);
        for (int i = 0; i < dim; i++) {
            scales[i] = scale;
        }
    }
    ScalesResult res = new ScalesResult(scales);
    return res;
}
Also used : LinearScale(de.lmu.ifi.dbs.elki.math.scales.LinearScale) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) ScalesResult(de.lmu.ifi.dbs.elki.result.ScalesResult) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ListSizeConstraint(de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListSizeConstraint) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)329 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)78 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)76 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)72 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)70 ArrayList (java.util.ArrayList)61 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)56 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)56 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)55 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)55 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)54 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)53 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)42 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)40 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)34 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)31 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)30 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)25 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)24 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)21