use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class OnlineLOF method run.
/**
* Performs the Generalized LOF_SCORE algorithm on the given database by
* calling {@code #doRunInTime(Database)} and adds a {@link LOFKNNListener} to
* the preprocessors.
*/
@Override
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
KNNQuery<O> kNNRefer = queries.getFirst().getFirst();
KNNQuery<O> kNNReach = queries.getFirst().getSecond();
RKNNQuery<O> rkNNRefer = queries.getSecond().getFirst();
RKNNQuery<O> rkNNReach = queries.getSecond().getSecond();
LOFResult<O> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
lofResult.setRkNNRefer(rkNNRefer);
lofResult.setRkNNReach(rkNNReach);
// add listener
KNNListener l = new LOFKNNListener(lofResult);
((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class OnlineLOF method getKNNAndRkNNQueries.
/**
* Get the kNN and rkNN queries for the algorithm.
*
* @param relation Data
* @param stepprog Progress logger
* @return the kNN and rkNN queries
*/
private Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
DistanceQuery<O> drefQ = database.getDistanceQuery(relation, referenceDistanceFunction);
// Use "HEAVY" flag, since this is an online algorithm
KNNQuery<O> kNNRefer = database.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
RKNNQuery<O> rkNNRefer = database.getRKNNQuery(drefQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query or RkNN query - use a preprocessor!
if (kNNRefer == null || rkNNRefer == null) {
if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
kNNRefer = preproc.getKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
rkNNRefer = preproc.getRKNNQuery(drefQ, krefer, DatabaseQuery.HINT_HEAVY_USE);
// add as index
database.getHierarchy().add(relation, preproc);
} else {
if (stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
DistanceQuery<O> dreachQ = database.getDistanceQuery(relation, reachabilityDistanceFunction);
KNNQuery<O> kNNReach = database.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
RKNNQuery<O> rkNNReach = database.getRKNNQuery(dreachQ, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
if (kNNReach == null || rkNNReach == null) {
if (stepprog != null) {
stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
kNNReach = preproc.getKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
rkNNReach = preproc.getRKNNQuery(dreachQ, kreach, DatabaseQuery.HINT_HEAVY_USE);
// add as index
database.getHierarchy().add(relation, preproc);
}
Pair<KNNQuery<O>, KNNQuery<O>> kNNPair = new Pair<>(kNNRefer, kNNReach);
Pair<RKNNQuery<O>, RKNNQuery<O>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
return new Pair<>(kNNPair, rkNNPair);
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class CTLuGLSBackwardSearchAlgorithm method singleIteration.
/**
* Run a single iteration of the GLS-SOD modeling step
*
* @param relationx Geo relation
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
final int dim = RelationUtil.dimensionality(relationx);
final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
// We need stable indexed DBIDs
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
// Sort, so we can do a binary search below.
ids.sort();
// init F,X,Z
double[][] X = new double[ids.size()][6];
double[][] F = new double[ids.size()][ids.size()];
double[][] Y = new double[ids.size()][dimy];
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Fill the data matrix
{
V vec = relationx.get(id);
double la = vec.doubleValue(0);
double lo = vec.doubleValue(1);
X[i][0] = 1.0;
X[i][1] = la;
X[i][2] = lo;
X[i][3] = la * lo;
X[i][4] = la * la;
X[i][5] = lo * lo;
}
{
final NumberVector vecy = relationy.get(id);
for (int d = 0; d < dimy; d++) {
double idy = vecy.doubleValue(d);
Y[i][d] = idy;
}
}
// Fill the neighborhood matrix F:
{
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(id, neighbor)) {
continue;
}
neighborhood.add(neighbor);
}
// Weight object itself positively.
F[i][i] = 1.0;
final int nweight = -1 / neighborhood.size();
// unfortunately.
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
int pos = ids.binarySearch(iter);
assert (pos >= 0);
F[pos][i] = nweight;
}
}
}
}
// Estimate the parameter beta
// Common term that we can save recomputing.
double[][] common = times(transposeTimesTranspose(X, F), F);
double[][] b = times(inverse(times(common, X)), times(common, Y));
// Estimate sigma_0 and sigma:
// sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
final double norm = 1 / FastMath.sqrt(sigma_sum_square);
// calculate the absolute values of standard residuals
double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
DBIDVar worstid = DBIDUtil.newVar();
double worstscore = Double.NEGATIVE_INFINITY;
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
double err = squareSum(getRow(E, i));
// double err = Math.abs(E.get(i, 0));
if (err > worstscore) {
worstscore = err;
worstid.set(id);
}
}
return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class DiSH method checkClusters.
/**
* Removes the clusters with size < minpts from the cluster map and adds them
* to their parents.
*
* @param relation the relation storing the objects
* @param clustersMap the map containing the clusters
*/
private void checkClusters(Relation<V> relation, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
final int dimensionality = RelationUtil.dimensionality(relation);
// check if there are clusters < minpts
// and add them to not assigned
List<Pair<long[], ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> newClustersMap = new Object2ObjectOpenCustomHashMap<>(BitsUtil.FASTUTIL_HASH_STRATEGY);
Pair<long[], ArrayModifiableDBIDs> noise = new Pair<>(BitsUtil.zero(dimensionality), DBIDUtil.newArray());
for (long[] pv : clustersMap.keySet()) {
// noise
if (BitsUtil.cardinality(pv) == 0) {
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
for (ArrayModifiableDBIDs c : parallelClusters) {
noise.second.addDBIDs(c);
}
} else // clusters
{
List<ArrayModifiableDBIDs> parallelClusters = clustersMap.get(pv);
List<ArrayModifiableDBIDs> newParallelClusters = new ArrayList<>(parallelClusters.size());
for (ArrayModifiableDBIDs c : parallelClusters) {
if (!BitsUtil.isZero(pv) && c.size() < mu) {
notAssigned.add(new Pair<>(pv, c));
} else {
newParallelClusters.add(c);
}
}
newClustersMap.put(pv, newParallelClusters);
}
}
clustersMap.clear();
clustersMap.putAll(newClustersMap);
for (Pair<long[], ArrayModifiableDBIDs> c : notAssigned) {
if (c.second.isEmpty()) {
continue;
}
Pair<long[], ArrayModifiableDBIDs> parent = findParent(relation, c, clustersMap);
if (parent != null) {
parent.second.addDBIDs(c.second);
} else {
noise.second.addDBIDs(c.second);
}
}
List<ArrayModifiableDBIDs> noiseList = new ArrayList<>(1);
noiseList.add(noise.second);
clustersMap.put(noise.first, noiseList);
}
use of de.lmu.ifi.dbs.elki.utilities.pairs.Pair in project elki by elki-project.
the class IndexStatistics method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
Database db = ResultUtil.findDatabase(hier);
Collection<String> header = null;
final ArrayList<IndexTree<?, ?>> indexes = ResultUtil.filterResults(hier, newResult, IndexTree.class);
if (indexes == null || indexes.isEmpty()) {
return;
}
for (IndexTree<?, ?> index : indexes) {
header = new ArrayList<>();
header.add(index.toString());
}
Collection<Pair<String, String>> col = new ArrayList<>();
IndexMetaResult analysis = new IndexMetaResult(col, header);
db.getHierarchy().add(db, analysis);
}
Aggregations