Search in sources :

Example 6 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class ROCEvaluationTest method testROCCurve.

/**
 * Test ROC curve generation, including curve simplification
 */
@Test
public void testROCCurve() {
    HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
    positive.add(DBIDUtil.importInteger(1));
    positive.add(DBIDUtil.importInteger(2));
    positive.add(DBIDUtil.importInteger(3));
    positive.add(DBIDUtil.importInteger(4));
    positive.add(DBIDUtil.importInteger(5));
    final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
    // Starting point: ................................ 0.0,0. ++
    // + 0.0,.2 -- redundant
    distances.add(0.0, DBIDUtil.importInteger(1));
    // + 0.0,.4 ++
    distances.add(1.0, DBIDUtil.importInteger(2));
    // - .25,.4 ++
    distances.add(2.0, DBIDUtil.importInteger(6));
    // -
    distances.add(3.0, DBIDUtil.importInteger(7));
    // + .50,.6 -- redundant
    distances.add(3.0, DBIDUtil.importInteger(3));
    // -
    distances.add(4.0, DBIDUtil.importInteger(8));
    // + .75,.8 ++
    distances.add(4.0, DBIDUtil.importInteger(4));
    // - 1.0,.8 ++
    distances.add(5.0, DBIDUtil.importInteger(9));
    // + 1.0,1. ++
    distances.add(6.0, DBIDUtil.importInteger(5));
    XYCurve roccurve = ROCEvaluation.materializeROC(new DBIDsTest(positive), new DistanceResultAdapter(distances.iter()));
    // System.err.println(roccurve);
    assertEquals("ROC curve too complex", 6, roccurve.size());
    double auc = XYCurve.areaUnderCurve(roccurve);
    assertEquals("ROC AUC (curve) not correct.", 0.6, auc, 1e-14);
    double auc2 = new ROCEvaluation().evaluate(positive, distances);
    assertEquals("ROC AUC (direct) not correct.", 0.6, auc2, 1e-14);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) XYCurve(de.lmu.ifi.dbs.elki.math.geometry.XYCurve) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest) DistanceResultAdapter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DistanceResultAdapter) Test(org.junit.Test) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest)

Example 7 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class AveragePrecisionEvaluationTest method testAveragePrecision.

/**
 * Test Average Precision score computation.
 */
@Test
public void testAveragePrecision() {
    HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
    positive.add(DBIDUtil.importInteger(1));
    positive.add(DBIDUtil.importInteger(2));
    positive.add(DBIDUtil.importInteger(3));
    positive.add(DBIDUtil.importInteger(4));
    positive.add(DBIDUtil.importInteger(5));
    final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
    // Precision: 1.0
    distances.add(0.0, DBIDUtil.importInteger(1));
    // Precision: 1.0
    distances.add(1.0, DBIDUtil.importInteger(2));
    // 
    distances.add(2.0, DBIDUtil.importInteger(6));
    // 
    distances.add(3.0, DBIDUtil.importInteger(7));
    // Precision: 0.6
    distances.add(3.0, DBIDUtil.importInteger(3));
    // 
    distances.add(4.0, DBIDUtil.importInteger(8));
    // Precision: 4/7.
    distances.add(4.0, DBIDUtil.importInteger(4));
    // 
    distances.add(5.0, DBIDUtil.importInteger(9));
    // Precision: 5/9.
    distances.add(6.0, DBIDUtil.importInteger(5));
    // (1+1+.6+4/7.+5/9.)/5 = 0.7453968253968254
    double ap = new AveragePrecisionEvaluation().evaluate(positive, distances);
    assertEquals("Average precision not correct.", 0.7453968253968254, ap, 1e-14);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) Test(org.junit.Test)

Example 8 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class P3C method partitionData.

/**
 * Partition the data set into {@code bins} bins in each dimension
 * <i>independently</i>.
 *
 * This can be used to construct a grid approximation of the data using O(d n)
 * memory.
 *
 * When a dimension is found to be constant, it will not be partitioned, but
 * instead the corresponding array will be set to {@code null}.
 *
 * @param relation Data relation to partition
 * @param bins Number of bins
 * @return Partitions of each dimension.
 */
private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
    final int dim = RelationUtil.dimensionality(relation);
    SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
    // will be reused.
    DBIDArrayIter iter = ids.iter();
    SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
    for (int d = 0; d < dim; d++) {
        sorter.setDimension(d);
        ids.sort(sorter);
        // Minimum:
        iter.seek(0);
        double min = relation.get(iter).doubleValue(d);
        // Extend:
        iter.seek(ids.size() - 1);
        double delta = (relation.get(iter).doubleValue(d) - min) / bins;
        if (delta > 0.) {
            SetDBIDs[] dimparts = partitions[d];
            double split = min + delta;
            HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
            dimparts[0] = pids;
            int i = 0;
            for (iter.seek(0); iter.valid(); iter.advance()) {
                final double v = relation.get(iter).doubleValue(d);
                if (v <= split || i == dimparts.length - 1) {
                    pids.add(iter);
                } else {
                    i++;
                    split += delta;
                    pids = DBIDUtil.newHashSet();
                    dimparts[i] = pids;
                }
            }
            for (++i; i < dimparts.length; ++i) {
                dimparts[i] = pids;
            }
        } else {
            // Flag whole dimension as bad
            partitions[d] = null;
        }
    }
    return partitions;
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) SortDBIDsBySingleDimension(de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) SetDBIDs(de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)

Example 9 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class CLIQUEUnit method join.

/**
 * Joins this unit with the specified unit.
 *
 * @param other the unit to be joined
 * @param all the overall number of feature vectors
 * @param tau the density threshold for the selectivity of a unit
 * @return the joined unit if the selectivity of the join result is equal or
 *         greater than tau, null otherwise
 */
public CLIQUEUnit<V> join(CLIQUEUnit<V> other, double all, double tau) {
    CLIQUEInterval i1 = this.intervals.get(this.intervals.size() - 1);
    CLIQUEInterval i2 = other.intervals.get(other.intervals.size() - 1);
    if (i1.getDimension() >= i2.getDimension()) {
        return null;
    }
    Iterator<CLIQUEInterval> it1 = this.intervals.iterator();
    Iterator<CLIQUEInterval> it2 = other.intervals.iterator();
    ArrayList<CLIQUEInterval> resultIntervals = new ArrayList<>();
    for (int i = 0; i < this.intervals.size() - 1; i++) {
        i1 = it1.next();
        i2 = it2.next();
        if (!i1.equals(i2)) {
            return null;
        }
        resultIntervals.add(i1);
    }
    resultIntervals.add(this.intervals.get(this.intervals.size() - 1));
    resultIntervals.add(other.intervals.get(other.intervals.size() - 1));
    HashSetModifiableDBIDs resultIDs = DBIDUtil.newHashSet(this.ids);
    resultIDs.retainAll(other.ids);
    if (resultIDs.size() / all >= tau) {
        return new CLIQUEUnit<>(resultIntervals, resultIDs);
    }
    return null;
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ArrayList(java.util.ArrayList)

Example 10 with HashSetModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.

the class AbstractAggarwalYuOutlier method computeSubspace.

/**
 * Method to get the ids in the given subspace.
 *
 * @param subspace Subspace to process
 * @param ranges List of DBID ranges
 * @return ids
 */
protected DBIDs computeSubspace(ArrayList<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
    HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second));
    // intersect all selected dimensions
    for (int i = 1; i < subspace.size(); i++) {
        DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second - GENE_OFFSET);
        ids.retainAll(current);
        if (ids.size() == 0) {
            break;
        }
    }
    return ids;
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)

Aggregations

HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)21 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)5 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)5 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)5 ArrayList (java.util.ArrayList)5 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)4 Test (org.junit.Test)4 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)2 SetDBIDs (de.lmu.ifi.dbs.elki.database.ids.SetDBIDs)2 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)2 PCAFilteredResult (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult)2 SortedEigenPairs (de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)2 PreDeConModel (de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.PreDeConNeighborPredicate.PreDeConModel)1 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)1 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1