use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class DeLiCluTreeIndex method insertAll.
/**
* Inserts the specified objects into this index. If a bulk load mode is
* implemented, the objects are inserted in one bulk.
*
* @param ids the objects to be inserted
*/
@Override
public final void insertAll(DBIDs ids) {
if (ids.isEmpty() || (ids.size() == 1)) {
return;
}
// Make an example leaf
if (canBulkLoad()) {
List<DeLiCluEntry> leafs = new ArrayList<>(ids.size());
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
leafs.add(createNewLeafEntry(DBIDUtil.deref(iter)));
}
bulkLoad(leafs);
} else {
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
insert(iter);
}
}
doExtraIntegrityChecks();
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class EMOutlier method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<V> relation) {
emClustering.setSoft(true);
Clustering<?> emresult = emClustering.run(database, relation);
Relation<double[]> soft = null;
for (It<Relation<double[]>> iter = emresult.getHierarchy().iterChildren(emresult).filter(Relation.class); iter.valid(); iter.advance()) {
if (iter.get().getDataTypeInformation() == EM.SOFT_TYPE) {
soft = iter.get();
}
}
double globmax = 0.0;
WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double maxProb = Double.POSITIVE_INFINITY;
double[] probs = soft.get(iditer);
for (double prob : probs) {
maxProb = Math.min(1. - prob, maxProb);
}
emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("EM outlier scores", "em-outlier", emo_score, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
// combine results.
OutlierResult result = new OutlierResult(meta, scoreres);
// TODO: add a keep-EM flag?
result.addChildResult(emresult);
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SilhouetteOutlierDetection method run.
@Override
public OutlierResult run(Database database) {
Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
case TREAT_NOISE_AS_SINGLETONS:
// As suggested in Rousseeuw, we use 0 for singletons.
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
scores.put(iter, 0.);
}
mm.put(0.);
continue;
case MERGE_NOISE:
// Treat as cluster below
break;
}
}
ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
// temporary storage.
double[] as = new double[ids.size()];
DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
for (it1.seek(0); it1.valid(); it1.advance()) {
// a: In-cluster distances
// Already computed distances
double a = as[it1.getOffset()];
for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
final double dist = dq.distance(it1, it2);
a += dist;
as[it2.getOffset()] += dist;
}
a /= (ids.size() - 1);
// b: other clusters:
double min = Double.POSITIVE_INFINITY;
for (Cluster<?> ocluster : clusters) {
if (ocluster == /* yes, reference identity */
cluster) {
continue;
}
if (ocluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
continue;
case MERGE_NOISE:
// No special treatment
break;
case TREAT_NOISE_AS_SINGLETONS:
// Treat noise cluster as singletons:
for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
double dist = dq.distance(it1, it3);
if (dist < min) {
min = dist;
}
}
continue;
}
}
final DBIDs oids = ocluster.getIDs();
double b = 0.;
for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
b += dq.distance(it1, it3);
}
b /= oids.size();
if (b < min) {
min = b;
}
}
final double score = (min - a) / Math.max(min, a);
scores.put(it1, score);
mm.put(score);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class OutRankS1 method run.
@Override
public OutlierResult run(Database database) {
DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
// Run the primary algorithm
Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
// Process objects in the cluster
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
OutlierResult res = new OutlierResult(meta, scoreResult);
res.addChildResult(clustering);
return res;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class AddSingleScale method run.
/**
* Add scales to a single vector relation.
*
* @param rel Relation
* @return Scales
*/
private ScalesResult run(Relation<? extends NumberVector> rel) {
final int dim = RelationUtil.dimensionality(rel);
LinearScale[] scales = new LinearScale[dim];
if (minmax == null) {
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
NumberVector vec = rel.get(iditer);
for (int d = 0; d < dim; d++) {
final double val = vec.doubleValue(d);
if (val != val) {
// NaN
continue;
}
mm.put(val);
}
}
LinearScale scale = new LinearScale(mm.getMin(), mm.getMax());
for (int i = 0; i < dim; i++) {
scales[i] = scale;
}
} else {
// Use predefined.
LinearScale scale = new LinearScale(minmax[0], minmax[1]);
for (int i = 0; i < dim; i++) {
scales[i] = scale;
}
}
ScalesResult res = new ScalesResult(scales);
return res;
}
Aggregations