use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class RelationUtil method variances.
/**
* Determines the variances in each dimension of the specified objects stored
* in the given database.
*
* @param database the database storing the objects
* @param ids the ids of the objects
* @param centroid the centroid or reference vector of the ids
* @return the variances in each dimension of the specified objects
*/
public static double[] variances(Relation<? extends NumberVector> database, NumberVector centroid, DBIDs ids) {
final int size = ids.size();
double[] variances = new double[centroid.getDimensionality()];
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
NumberVector o = database.get(iter);
for (int d = 0; d < centroid.getDimensionality(); d++) {
final double diff = o.doubleValue(d) - centroid.doubleValue(d);
variances[d] += diff * diff / size;
}
}
return variances;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class Scales method calcScales.
/**
* Compute a linear scale for each dimension.
*
* @param rel Relation
* @return Scales, indexed starting with 0 (like Vector, not database
* objects!)
*/
public static LinearScale[] calcScales(Relation<? extends SpatialComparable> rel) {
int dim = RelationUtil.dimensionality(rel);
DoubleMinMax[] minmax = DoubleMinMax.newArray(dim);
LinearScale[] scales = new LinearScale[dim];
// analyze data
for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
SpatialComparable v = rel.get(iditer);
if (v instanceof NumberVector) {
for (int d = 0; d < dim; d++) {
final double mi = v.getMin(d);
if (mi != mi) {
// NaN
continue;
}
minmax[d].put(mi);
}
} else {
for (int d = 0; d < dim; d++) {
final double mi = v.getMin(d);
if (mi == mi) {
// No NaN
minmax[d].put(mi);
}
final double ma = v.getMax(d);
if (ma == ma) {
// No NaN
minmax[d].put(ma);
}
}
}
}
// generate scales
for (int d = 0; d < dim; d++) {
scales[d] = new LinearScale(minmax[d].getMin(), minmax[d].getMax());
}
return scales;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class WeightedCovarianceMatrixBuilder method processQueryResults.
/**
* Compute Covariance Matrix for a QueryResult Collection.
*
* By default it will just collect the ids and run processIds
*
* @param results a collection of QueryResults
* @param database the database used
* @param k number of elements to process
* @return Covariance Matrix
*/
@Override
public double[][] processQueryResults(DoubleDBIDList results, Relation<? extends NumberVector> database, int k) {
final int dim = RelationUtil.dimensionality(database);
final CovarianceMatrix cmat = new CovarianceMatrix(dim);
// avoid bad parameters
k = k <= results.size() ? k : results.size();
// find maximum distance
double maxdist = 0.0, stddev = 0.0;
{
int i = 0;
for (DoubleDBIDListIter it = results.iter(); it.valid() && i < k; it.advance(), k++) {
final double dist = it.doubleValue();
stddev += dist * dist;
if (dist > maxdist) {
maxdist = dist;
}
}
if (maxdist == 0.0) {
maxdist = 1.0;
}
stddev = FastMath.sqrt(stddev / k);
}
// calculate weighted PCA
int i = 0;
for (DoubleDBIDListIter it = results.iter(); it.valid() && i < k; it.advance(), k++) {
final double dist = it.doubleValue();
NumberVector obj = database.get(it);
double weight = weightfunction.getWeight(dist, maxdist, stddev);
cmat.put(obj, weight);
}
return cmat.destroyToPopulationMatrix();
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class CTLuGLSBackwardSearchAlgorithm method singleIteration.
/**
* Run a single iteration of the GLS-SOD modeling step
*
* @param relationx Geo relation
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
final int dim = RelationUtil.dimensionality(relationx);
final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
// We need stable indexed DBIDs
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
// Sort, so we can do a binary search below.
ids.sort();
// init F,X,Z
double[][] X = new double[ids.size()][6];
double[][] F = new double[ids.size()][ids.size()];
double[][] Y = new double[ids.size()][dimy];
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Fill the data matrix
{
V vec = relationx.get(id);
double la = vec.doubleValue(0);
double lo = vec.doubleValue(1);
X[i][0] = 1.0;
X[i][1] = la;
X[i][2] = lo;
X[i][3] = la * lo;
X[i][4] = la * la;
X[i][5] = lo * lo;
}
{
final NumberVector vecy = relationy.get(id);
for (int d = 0; d < dimy; d++) {
double idy = vecy.doubleValue(d);
Y[i][d] = idy;
}
}
// Fill the neighborhood matrix F:
{
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(id, neighbor)) {
continue;
}
neighborhood.add(neighbor);
}
// Weight object itself positively.
F[i][i] = 1.0;
final int nweight = -1 / neighborhood.size();
// unfortunately.
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
int pos = ids.binarySearch(iter);
assert (pos >= 0);
F[pos][i] = nweight;
}
}
}
}
// Estimate the parameter beta
// Common term that we can save recomputing.
double[][] common = times(transposeTimesTranspose(X, F), F);
double[][] b = times(inverse(times(common, X)), times(common, Y));
// Estimate sigma_0 and sigma:
// sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
final double norm = 1 / FastMath.sqrt(sigma_sum_square);
// calculate the absolute values of standard residuals
double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
DBIDVar worstid = DBIDUtil.newVar();
double worstscore = Double.NEGATIVE_INFINITY;
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
double err = squareSum(getRow(E, i));
// double err = Math.abs(E.get(i, 0));
if (err > worstscore) {
worstscore = err;
worstid.set(id);
}
}
return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class ReferenceBasedOutlierDetection method run.
/**
* Run the algorithm on the given relation.
*
* @param database Database
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
@SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
if (refPoints.isEmpty()) {
throw new AbortException("Cannot compute ROS without reference points!");
}
DBIDs ids = relation.getDBIDs();
if (k >= ids.size()) {
throw new AbortException("k must not be chosen larger than the database size!");
}
// storage of distance/score values.
WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
// Compute density estimation:
for (NumberVector refPoint : refPoints) {
DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
updateDensities(rbod_score, referenceDists);
}
// compute maximum density
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
mm.put(rbod_score.doubleValue(iditer));
}
// compute ROS
double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
// Reuse
mm.reset();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1 - (rbod_score.doubleValue(iditer) * scale);
mm.put(score);
rbod_score.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// adds reference points to the result. header information for the
// visualizer to find the reference points in the result
result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
return result;
}
Aggregations