Search in sources :

Example 11 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class RelationUtil method variances.

/**
 * Determines the variances in each dimension of the specified objects stored
 * in the given database.
 *
 * @param database the database storing the objects
 * @param ids the ids of the objects
 * @param centroid the centroid or reference vector of the ids
 * @return the variances in each dimension of the specified objects
 */
public static double[] variances(Relation<? extends NumberVector> database, NumberVector centroid, DBIDs ids) {
    final int size = ids.size();
    double[] variances = new double[centroid.getDimensionality()];
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        NumberVector o = database.get(iter);
        for (int d = 0; d < centroid.getDimensionality(); d++) {
            final double diff = o.doubleValue(d) - centroid.doubleValue(d);
            variances[d] += diff * diff / size;
        }
    }
    return variances;
}
Also used : NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 12 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class Scales method calcScales.

/**
 * Compute a linear scale for each dimension.
 *
 * @param rel Relation
 * @return Scales, indexed starting with 0 (like Vector, not database
 *         objects!)
 */
public static LinearScale[] calcScales(Relation<? extends SpatialComparable> rel) {
    int dim = RelationUtil.dimensionality(rel);
    DoubleMinMax[] minmax = DoubleMinMax.newArray(dim);
    LinearScale[] scales = new LinearScale[dim];
    // analyze data
    for (DBIDIter iditer = rel.iterDBIDs(); iditer.valid(); iditer.advance()) {
        SpatialComparable v = rel.get(iditer);
        if (v instanceof NumberVector) {
            for (int d = 0; d < dim; d++) {
                final double mi = v.getMin(d);
                if (mi != mi) {
                    // NaN
                    continue;
                }
                minmax[d].put(mi);
            }
        } else {
            for (int d = 0; d < dim; d++) {
                final double mi = v.getMin(d);
                if (mi == mi) {
                    // No NaN
                    minmax[d].put(mi);
                }
                final double ma = v.getMax(d);
                if (ma == ma) {
                    // No NaN
                    minmax[d].put(ma);
                }
            }
        }
    }
    // generate scales
    for (int d = 0; d < dim; d++) {
        scales[d] = new LinearScale(minmax[d].getMin(), minmax[d].getMax());
    }
    return scales;
}
Also used : DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) SpatialComparable(de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 13 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class WeightedCovarianceMatrixBuilder method processQueryResults.

/**
 * Compute Covariance Matrix for a QueryResult Collection.
 *
 * By default it will just collect the ids and run processIds
 *
 * @param results a collection of QueryResults
 * @param database the database used
 * @param k number of elements to process
 * @return Covariance Matrix
 */
@Override
public double[][] processQueryResults(DoubleDBIDList results, Relation<? extends NumberVector> database, int k) {
    final int dim = RelationUtil.dimensionality(database);
    final CovarianceMatrix cmat = new CovarianceMatrix(dim);
    // avoid bad parameters
    k = k <= results.size() ? k : results.size();
    // find maximum distance
    double maxdist = 0.0, stddev = 0.0;
    {
        int i = 0;
        for (DoubleDBIDListIter it = results.iter(); it.valid() && i < k; it.advance(), k++) {
            final double dist = it.doubleValue();
            stddev += dist * dist;
            if (dist > maxdist) {
                maxdist = dist;
            }
        }
        if (maxdist == 0.0) {
            maxdist = 1.0;
        }
        stddev = FastMath.sqrt(stddev / k);
    }
    // calculate weighted PCA
    int i = 0;
    for (DoubleDBIDListIter it = results.iter(); it.valid() && i < k; it.advance(), k++) {
        final double dist = it.doubleValue();
        NumberVector obj = database.get(it);
        double weight = weightfunction.getWeight(dist, maxdist, stddev);
        cmat.put(obj, weight);
    }
    return cmat.destroyToPopulationMatrix();
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)

Example 14 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class CTLuGLSBackwardSearchAlgorithm method singleIteration.

/**
 * Run a single iteration of the GLS-SOD modeling step
 *
 * @param relationx Geo relation
 * @param relationy Attribute relation
 * @return Top outlier and associated score
 */
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
    final int dim = RelationUtil.dimensionality(relationx);
    final int dimy = RelationUtil.dimensionality(relationy);
    assert (dim == 2);
    KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
    // We need stable indexed DBIDs
    ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
    // Sort, so we can do a binary search below.
    ids.sort();
    // init F,X,Z
    double[][] X = new double[ids.size()][6];
    double[][] F = new double[ids.size()][ids.size()];
    double[][] Y = new double[ids.size()][dimy];
    {
        int i = 0;
        for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
            // Fill the data matrix
            {
                V vec = relationx.get(id);
                double la = vec.doubleValue(0);
                double lo = vec.doubleValue(1);
                X[i][0] = 1.0;
                X[i][1] = la;
                X[i][2] = lo;
                X[i][3] = la * lo;
                X[i][4] = la * la;
                X[i][5] = lo * lo;
            }
            {
                final NumberVector vecy = relationy.get(id);
                for (int d = 0; d < dimy; d++) {
                    double idy = vecy.doubleValue(d);
                    Y[i][d] = idy;
                }
            }
            // Fill the neighborhood matrix F:
            {
                KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
                ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
                for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
                    if (DBIDUtil.equal(id, neighbor)) {
                        continue;
                    }
                    neighborhood.add(neighbor);
                }
                // Weight object itself positively.
                F[i][i] = 1.0;
                final int nweight = -1 / neighborhood.size();
                // unfortunately.
                for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
                    int pos = ids.binarySearch(iter);
                    assert (pos >= 0);
                    F[pos][i] = nweight;
                }
            }
        }
    }
    // Estimate the parameter beta
    // Common term that we can save recomputing.
    double[][] common = times(transposeTimesTranspose(X, F), F);
    double[][] b = times(inverse(times(common, X)), times(common, Y));
    // Estimate sigma_0 and sigma:
    // sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
    double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
    final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
    final double norm = 1 / FastMath.sqrt(sigma_sum_square);
    // calculate the absolute values of standard residuals
    double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
    DBIDVar worstid = DBIDUtil.newVar();
    double worstscore = Double.NEGATIVE_INFINITY;
    int i = 0;
    for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
        double err = squareSum(getRow(E, i));
        // double err = Math.abs(E.get(i, 0));
        if (err > worstscore) {
            worstscore = err;
            worstid.set(id);
        }
    }
    return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Pair(de.lmu.ifi.dbs.elki.utilities.pairs.Pair)

Example 15 with NumberVector

use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.

the class ReferenceBasedOutlierDetection method run.

/**
 * Run the algorithm on the given relation.
 *
 * @param database Database
 * @param relation Relation to process
 * @return Outlier result
 */
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
    @SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
    Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
    if (refPoints.isEmpty()) {
        throw new AbortException("Cannot compute ROS without reference points!");
    }
    DBIDs ids = relation.getDBIDs();
    if (k >= ids.size()) {
        throw new AbortException("k must not be chosen larger than the database size!");
    }
    // storage of distance/score values.
    WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
    // Compute density estimation:
    for (NumberVector refPoint : refPoints) {
        DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
        updateDensities(rbod_score, referenceDists);
    }
    // compute maximum density
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        mm.put(rbod_score.doubleValue(iditer));
    }
    // compute ROS
    double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
    // Reuse
    mm.reset();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1 - (rbod_score.doubleValue(iditer) * scale);
        mm.put(score);
        rbod_score.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // adds reference points to the result. header information for the
    // visualizer to find the reference points in the result
    result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
    return result;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) PrimitiveDistanceQuery(de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)85 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)40 ArrayList (java.util.ArrayList)16 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)9 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)8 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Database (de.lmu.ifi.dbs.elki.database.Database)7 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)7 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)7 Random (java.util.Random)7 Test (org.junit.Test)7 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)5 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)5 EvaluationResult (de.lmu.ifi.dbs.elki.result.EvaluationResult)5 MeasurementGroup (de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup)5 List (java.util.List)5 SparseNumberVector (de.lmu.ifi.dbs.elki.data.SparseNumberVector)4 RandomProjectionFamily (de.lmu.ifi.dbs.elki.data.projection.random.RandomProjectionFamily)4