use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class CTLuGLSBackwardSearchAlgorithm method singleIteration.
/**
* Run a single iteration of the GLS-SOD modeling step
*
* @param relationx Geo relation
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
final int dim = RelationUtil.dimensionality(relationx);
final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
// We need stable indexed DBIDs
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
// Sort, so we can do a binary search below.
ids.sort();
// init F,X,Z
double[][] X = new double[ids.size()][6];
double[][] F = new double[ids.size()][ids.size()];
double[][] Y = new double[ids.size()][dimy];
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Fill the data matrix
{
V vec = relationx.get(id);
double la = vec.doubleValue(0);
double lo = vec.doubleValue(1);
X[i][0] = 1.0;
X[i][1] = la;
X[i][2] = lo;
X[i][3] = la * lo;
X[i][4] = la * la;
X[i][5] = lo * lo;
}
{
final NumberVector vecy = relationy.get(id);
for (int d = 0; d < dimy; d++) {
double idy = vecy.doubleValue(d);
Y[i][d] = idy;
}
}
// Fill the neighborhood matrix F:
{
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(id, neighbor)) {
continue;
}
neighborhood.add(neighbor);
}
// Weight object itself positively.
F[i][i] = 1.0;
final int nweight = -1 / neighborhood.size();
// unfortunately.
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
int pos = ids.binarySearch(iter);
assert (pos >= 0);
F[pos][i] = nweight;
}
}
}
}
// Estimate the parameter beta
// Common term that we can save recomputing.
double[][] common = times(transposeTimesTranspose(X, F), F);
double[][] b = times(inverse(times(common, X)), times(common, Y));
// Estimate sigma_0 and sigma:
// sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
final double norm = 1 / FastMath.sqrt(sigma_sum_square);
// calculate the absolute values of standard residuals
double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
DBIDVar worstid = DBIDUtil.newVar();
double worstscore = Double.NEGATIVE_INFINITY;
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
double err = squareSum(getRow(E, i));
// double err = Math.abs(E.get(i, 0));
if (err > worstscore) {
worstscore = err;
worstid.set(id);
}
}
return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class CTLuMedianAlgorithm method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
fi[c] = relation.get(iter).doubleValue(0);
c++;
}
if (c > 0) {
median = QuickSelect.median(fi, 0, c);
} else {
median = relation.get(iditer).doubleValue(0);
}
}
double h = relation.get(iditer).doubleValue(0) - median;
scores.putDouble(iditer, h);
mv.put(h);
}
// Normalize scores
final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class DWOF method run.
/**
* Performs the Generalized DWOF_SCORE algorithm on the given database by
* calling all the other methods in the proper order.
*
* @param database Database to query
* @param relation Data to process
* @return new OutlierResult instance
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DBIDs ids = relation.getDBIDs();
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Get k nearest neighbor and range query on the relation.
KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
// DWOF output score storage.
WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
if (stepProg != null) {
stepProg.beginStep(1, "Initializing objects' Radii", LOG);
}
WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
// Find an initial radius for each object:
initializeRadii(ids, knnq, distFunc, radii);
WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
int countUnmerged = relation.size();
if (stepProg != null) {
stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
}
IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
while (countUnmerged > 0) {
LOG.incrementProcessed(clusEvalProgress);
// Increase radii
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, radii.doubleValue(iter) * delta);
}
// stores the clustering label for each object
WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
// Cluster objects based on the current radius
clusterData(ids, rnnQuery, radii, labels);
// simple reference swap
WritableIntegerDataStore temp = newSizes;
newSizes = oldSizes;
oldSizes = temp;
// Update the cluster size count for each object.
countUnmerged = updateSizes(ids, labels, newSizes);
labels.destroy();
// Update DWOF scores.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
}
}
LOG.setCompleted(clusEvalProgress);
LOG.setCompleted(stepProg);
// Build result representation.
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
minmax.put(dwofs.doubleValue(iter));
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class GaussianUniformMixture method run.
/**
* Run the algorithm
*
* @param relation Data relation
* @return Outlier result
*/
public OutlierResult run(Relation<V> relation) {
// Use an array list of object IDs for fast random access by an offset
ArrayDBIDs objids = DBIDUtil.ensureArray(relation.getDBIDs());
// A bit set to flag objects as anomalous, none at the beginning
long[] bits = BitsUtil.zero(objids.size());
// Positive masked collection
DBIDs normalObjs = new MaskedDBIDs(objids, bits, true);
// Positive masked collection
DBIDs anomalousObjs = new MaskedDBIDs(objids, bits, false);
// resulting scores
WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// compute loglikelihood
double logLike = relation.size() * logml + loglikelihoodNormal(normalObjs, relation);
// LOG.debugFine("normalsize " + normalObjs.size() + " anormalsize " +
// anomalousObjs.size() + " all " + (anomalousObjs.size() +
// normalObjs.size()));
// LOG.debugFine(logLike + " loglike beginning" +
// loglikelihoodNormal(normalObjs, database));
DoubleMinMax minmax = new DoubleMinMax();
DBIDIter iter = objids.iter();
for (int i = 0; i < objids.size(); i++, iter.advance()) {
// LOG.debugFine("i " + i);
// Change mask to make the current object anomalous
BitsUtil.setI(bits, i);
// Compute new likelihoods
double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
// if the loglike increases more than a threshold, object stays in
// anomalous set and is flagged as outlier
final double loglikeGain = currentLogLike - logLike;
oscores.putDouble(iter, loglikeGain);
minmax.put(loglikeGain);
if (loglikeGain > c) {
// flag as outlier
// LOG.debugFine("Outlier: " + curid + " " + (currentLogLike -
// logLike));
// Update best logLike
logLike = currentLogLike;
} else {
// LOG.debugFine("Inlier: " + curid + " " + (currentLogLike - logLike));
// undo bit set
BitsUtil.clearI(bits, i);
}
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
DoubleRelation res = new MaterializedDoubleRelation("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class GaussianUniformMixture method loglikelihoodNormal.
/**
* Computes the loglikelihood of all normal objects. Gaussian model
*
* @param objids Object IDs for 'normal' objects.
* @param relation Database
* @return loglikelihood for normal objects
*/
private double loglikelihoodNormal(DBIDs objids, Relation<V> relation) {
if (objids.isEmpty()) {
return 0;
}
CovarianceMatrix builder = CovarianceMatrix.make(relation, objids);
double[] mean = builder.getMeanVector();
double[][] covarianceMatrix = builder.destroyToSampleMatrix();
// test singulaere matrix
double[][] covInv = inverse(covarianceMatrix);
double covarianceDet = new LUDecomposition(covarianceMatrix).det();
double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceDet);
// for each object compute probability and sum
double prob = 0;
for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
double[] x = minusEquals(relation.get(iter).toArray(), mean);
double mDist = transposeTimesTimes(x, covInv, x);
prob += FastMath.log(fakt * FastMath.exp(-mDist * .5));
}
return prob;
}
Aggregations