use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class ROCEvaluationTest method testROCCurve.
/**
* Test ROC curve generation, including curve simplification
*/
@Test
public void testROCCurve() {
HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
positive.add(DBIDUtil.importInteger(1));
positive.add(DBIDUtil.importInteger(2));
positive.add(DBIDUtil.importInteger(3));
positive.add(DBIDUtil.importInteger(4));
positive.add(DBIDUtil.importInteger(5));
final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
// Starting point: ................................ 0.0,0. ++
// + 0.0,.2 -- redundant
distances.add(0.0, DBIDUtil.importInteger(1));
// + 0.0,.4 ++
distances.add(1.0, DBIDUtil.importInteger(2));
// - .25,.4 ++
distances.add(2.0, DBIDUtil.importInteger(6));
// -
distances.add(3.0, DBIDUtil.importInteger(7));
// + .50,.6 -- redundant
distances.add(3.0, DBIDUtil.importInteger(3));
// -
distances.add(4.0, DBIDUtil.importInteger(8));
// + .75,.8 ++
distances.add(4.0, DBIDUtil.importInteger(4));
// - 1.0,.8 ++
distances.add(5.0, DBIDUtil.importInteger(9));
// + 1.0,1. ++
distances.add(6.0, DBIDUtil.importInteger(5));
XYCurve roccurve = ROCEvaluation.materializeROC(new DBIDsTest(positive), new DistanceResultAdapter(distances.iter()));
// System.err.println(roccurve);
assertEquals("ROC curve too complex", 6, roccurve.size());
double auc = XYCurve.areaUnderCurve(roccurve);
assertEquals("ROC AUC (curve) not correct.", 0.6, auc, 1e-14);
double auc2 = new ROCEvaluation().evaluate(positive, distances);
assertEquals("ROC AUC (direct) not correct.", 0.6, auc2, 1e-14);
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class AveragePrecisionEvaluationTest method testAveragePrecision.
/**
* Test Average Precision score computation.
*/
@Test
public void testAveragePrecision() {
HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
positive.add(DBIDUtil.importInteger(1));
positive.add(DBIDUtil.importInteger(2));
positive.add(DBIDUtil.importInteger(3));
positive.add(DBIDUtil.importInteger(4));
positive.add(DBIDUtil.importInteger(5));
final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
// Precision: 1.0
distances.add(0.0, DBIDUtil.importInteger(1));
// Precision: 1.0
distances.add(1.0, DBIDUtil.importInteger(2));
//
distances.add(2.0, DBIDUtil.importInteger(6));
//
distances.add(3.0, DBIDUtil.importInteger(7));
// Precision: 0.6
distances.add(3.0, DBIDUtil.importInteger(3));
//
distances.add(4.0, DBIDUtil.importInteger(8));
// Precision: 4/7.
distances.add(4.0, DBIDUtil.importInteger(4));
//
distances.add(5.0, DBIDUtil.importInteger(9));
// Precision: 5/9.
distances.add(6.0, DBIDUtil.importInteger(5));
// (1+1+.6+4/7.+5/9.)/5 = 0.7453968253968254
double ap = new AveragePrecisionEvaluation().evaluate(positive, distances);
assertEquals("Average precision not correct.", 0.7453968253968254, ap, 1e-14);
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class P3C method partitionData.
/**
* Partition the data set into {@code bins} bins in each dimension
* <i>independently</i>.
*
* This can be used to construct a grid approximation of the data using O(d n)
* memory.
*
* When a dimension is found to be constant, it will not be partitioned, but
* instead the corresponding array will be set to {@code null}.
*
* @param relation Data relation to partition
* @param bins Number of bins
* @return Partitions of each dimension.
*/
private SetDBIDs[][] partitionData(final Relation<V> relation, final int bins) {
final int dim = RelationUtil.dimensionality(relation);
SetDBIDs[][] partitions = new SetDBIDs[dim][bins];
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
// will be reused.
DBIDArrayIter iter = ids.iter();
SortDBIDsBySingleDimension sorter = new VectorUtil.SortDBIDsBySingleDimension(relation, 0);
for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
ids.sort(sorter);
// Minimum:
iter.seek(0);
double min = relation.get(iter).doubleValue(d);
// Extend:
iter.seek(ids.size() - 1);
double delta = (relation.get(iter).doubleValue(d) - min) / bins;
if (delta > 0.) {
SetDBIDs[] dimparts = partitions[d];
double split = min + delta;
HashSetModifiableDBIDs pids = DBIDUtil.newHashSet();
dimparts[0] = pids;
int i = 0;
for (iter.seek(0); iter.valid(); iter.advance()) {
final double v = relation.get(iter).doubleValue(d);
if (v <= split || i == dimparts.length - 1) {
pids.add(iter);
} else {
i++;
split += delta;
pids = DBIDUtil.newHashSet();
dimparts[i] = pids;
}
}
for (++i; i < dimparts.length; ++i) {
dimparts[i] = pids;
}
} else {
// Flag whole dimension as bad
partitions[d] = null;
}
}
return partitions;
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class CLIQUEUnit method join.
/**
* Joins this unit with the specified unit.
*
* @param other the unit to be joined
* @param all the overall number of feature vectors
* @param tau the density threshold for the selectivity of a unit
* @return the joined unit if the selectivity of the join result is equal or
* greater than tau, null otherwise
*/
public CLIQUEUnit<V> join(CLIQUEUnit<V> other, double all, double tau) {
CLIQUEInterval i1 = this.intervals.get(this.intervals.size() - 1);
CLIQUEInterval i2 = other.intervals.get(other.intervals.size() - 1);
if (i1.getDimension() >= i2.getDimension()) {
return null;
}
Iterator<CLIQUEInterval> it1 = this.intervals.iterator();
Iterator<CLIQUEInterval> it2 = other.intervals.iterator();
ArrayList<CLIQUEInterval> resultIntervals = new ArrayList<>();
for (int i = 0; i < this.intervals.size() - 1; i++) {
i1 = it1.next();
i2 = it2.next();
if (!i1.equals(i2)) {
return null;
}
resultIntervals.add(i1);
}
resultIntervals.add(this.intervals.get(this.intervals.size() - 1));
resultIntervals.add(other.intervals.get(other.intervals.size() - 1));
HashSetModifiableDBIDs resultIDs = DBIDUtil.newHashSet(this.ids);
resultIDs.retainAll(other.ids);
if (resultIDs.size() / all >= tau) {
return new CLIQUEUnit<>(resultIntervals, resultIDs);
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs in project elki by elki-project.
the class AbstractAggarwalYuOutlier method computeSubspace.
/**
* Method to get the ids in the given subspace.
*
* @param subspace Subspace to process
* @param ranges List of DBID ranges
* @return ids
*/
protected DBIDs computeSubspace(ArrayList<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second));
// intersect all selected dimensions
for (int i = 1; i < subspace.size(); i++) {
DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second - GENE_OFFSET);
ids.retainAll(current);
if (ids.size() == 0) {
break;
}
}
return ids;
}
Aggregations