use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class COP method run.
/**
* Process a single relation.
*
* @param relation Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
final int dim = RelationUtil.dimensionality(relation);
if (k <= dim + 1) {
LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
}
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = null;
WritableIntegerDataStore cop_dim = null;
if (models) {
cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
}
// compute neighbors of each db object
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
// Do not use query object
nids.remove(id);
double[] centroid = Centroid.make(relation, nids).getArrayRef();
double[] relative = minusEquals(relation.get(id).toArray(), centroid);
PCAResult pcares = pca.processIds(nids, relation);
double[][] evecs = pcares.getEigenvectors();
double[] projected = transposeTimes(evecs, relative);
double[] evs = pcares.getEigenvalues();
double min = Double.POSITIVE_INFINITY;
int vdim = dim;
switch(dist) {
case CHISQUARED:
{
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Evaluate
double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
case GAMMA:
{
double[][] dists = new double[dim][nids.size()];
int j = 0;
double[] srel = new double[dim];
for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
V vec = relation.get(s);
for (int d = 0; d < dim; d++) {
srel[d] = vec.doubleValue(d) - centroid[d];
}
double[] serr = transposeTimes(evecs, srel);
double sqdist = 0.0;
for (int d = 0; d < dim; d++) {
double serrd = serr[d];
sqdist += serrd * serrd / evs[d];
dists[d][j] = sqdist;
}
j++;
}
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
final double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Sort, so we can trim the top 15% below.
Arrays.sort(dists[d]);
// Evaluate
double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
}
// Normalize the value
final double prob = expect * (1 - min) / (expect + min);
// Construct the error vector:
for (int d = vdim; d < dim; d++) {
projected[d] = 0.;
}
double[] ev = timesEquals(times(evecs, projected), -1 * prob);
cop_score.putDouble(id, prob);
if (models) {
cop_err_v.put(id, ev);
cop_dim.putInt(id, dim + 1 - vdim);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
if (models) {
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class AbstractLayout3DPC method computeSimilarityMatrix.
/**
* Compute a column-wise dependency matrix for the given relation.
*
* @param sim Dependence measure
* @param rel Vector relation
* @return Similarity matrix (lower triangular form)
*/
public static double[] computeSimilarityMatrix(DependenceMeasure sim, Relation<? extends NumberVector> rel) {
final int dim = RelationUtil.dimensionality(rel);
final int size = rel.size();
// TODO: we could use less memory (no copy), but this would likely be
// slower. Maybe as a fallback option?
double[][] data = new double[dim][size];
int r = 0;
for (DBIDIter it = rel.iterDBIDs(); it.valid(); it.advance(), r++) {
NumberVector v = rel.get(it);
for (int d = 0; d < dim; d++) {
data[d][r] = v.doubleValue(d);
}
}
return sim.dependence(DoubleArrayAdapter.STATIC, Arrays.asList(data));
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SharedNearestNeighborPreprocessor method initialize.
@Override
public void initialize() {
if (getLogger().isVerbose()) {
getLogger().verbose("Assigning nearest neighbor lists to database objects");
}
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, ArrayDBIDs.class);
KNNQuery<O> knnquery = QueryUtil.getKNNQuery(relation, distanceFunction, numberOfNeighbors);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("assigning nearest neighbor lists", relation.size(), getLogger()) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors);
DBIDs kNN = knnquery.getKNNForDBID(iditer, numberOfNeighbors);
for (DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) {
// if(!id.equals(nid)) {
neighbors.add(iter);
// Size limitation to exactly numberOfNeighbors
if (neighbors.size() >= numberOfNeighbors) {
break;
}
}
neighbors.sort();
storage.put(iditer, neighbors);
getLogger().incrementProcessed(progress);
}
getLogger().ensureCompleted(progress);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class RdKNNTree method reverseKNNQuery.
public DoubleDBIDList reverseKNNQuery(DBID oid, int k, SpatialPrimitiveDistanceFunction<? super O> distanceFunction, KNNQuery<O> knnQuery) {
checkDistanceFunction(distanceFunction);
if (k > settings.k_max) {
throw new IllegalArgumentException("Parameter k is not supported, k > k_max: " + k + " > " + settings.k_max);
}
// get candidates
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList();
doReverseKNN(getRoot(), oid, candidates);
if (k == settings.k_max) {
candidates.sort();
return candidates;
}
// refinement of candidates, if k < k_max
ArrayModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates);
candidateIDs.sort();
List<? extends KNNList> knnLists = knnQuery.getKNNForBulkDBIDs(candidateIDs, k);
ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
int i = 0;
for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance(), i++) {
for (DoubleDBIDListIter qr = knnLists.get(i).iter(); qr.valid(); qr.advance()) {
if (DBIDUtil.equal(oid, qr)) {
result.add(qr.doubleValue(), iter);
break;
}
}
}
result.sort();
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class RStarTreeIndex method insertAll.
/**
* Inserts the specified objects into this index. If a bulk load mode is
* implemented, the objects are inserted in one bulk.
*
* @param ids the objects to be inserted
*/
@Override
public void insertAll(DBIDs ids) {
if (ids.isEmpty() || (ids.size() == 1)) {
return;
}
// Make an example leaf
if (canBulkLoad()) {
List<SpatialEntry> leafs = new ArrayList<>(ids.size());
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
leafs.add(createNewLeafEntry(iter));
}
bulkLoad(leafs);
} else {
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
insert(DBIDUtil.deref(iter));
}
}
doExtraIntegrityChecks();
}
Aggregations