Search in sources :

Example 11 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class CTLuMeanMultipleAttributes method run.

/**
 * Run the algorithm
 *
 * @param database Database
 * @param spatial Spatial relation
 * @param attributes Numerical attributes
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> attributes) {
    if (LOG.isDebugging()) {
        LOG.debug("Dimensionality: " + RelationUtil.dimensionality(attributes));
    }
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
    CovarianceMatrix covmaker = new CovarianceMatrix(RelationUtil.dimensionality(attributes));
    WritableDataStore<double[]> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
    for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final O obj = attributes.get(iditer);
        final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        // TODO: remove object itself from neighbors?
        // Mean vector "g"
        double[] mean = Centroid.make(attributes, neighbors).getArrayRef();
        // Delta vector "h"
        double[] delta = minusEquals(obj.toArray(), mean);
        deltas.put(iditer, delta);
        covmaker.put(delta);
    }
    // Finalize covariance matrix:
    double[] mean = covmaker.getMeanVector();
    double[][] cmati = inverse(covmaker.destroyToSampleMatrix());
    DoubleMinMax minmax = new DoubleMinMax();
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
    for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final double score = mahalanobisDistance(cmati, deltas.get(iditer), mean);
        minmax.put(score);
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreResult = new MaterializedDoubleRelation("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", scores, attributes.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 12 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class GlobalPrincipalComponentAnalysisTransformTest method defaultParameters.

/**
 * Test with default parameters.
 */
@Test
public void defaultParameters() {
    String filename = UNITTEST + "transformation-test-1.csv";
    GlobalPrincipalComponentAnalysisTransform<DoubleVector> filter = new ELKIBuilder<GlobalPrincipalComponentAnalysisTransform<DoubleVector>>(GlobalPrincipalComponentAnalysisTransform.class).build();
    MultipleObjectsBundle bundle = readBundle(filename, filter);
    int dim = getFieldDimensionality(bundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    // We verify that the result has mean 0 and variance 1 in each column.
    // We also expect that covariances of any two columns are 0.
    CovarianceMatrix cm = new CovarianceMatrix(dim);
    MeanVariance[] mvs = MeanVariance.newArray(dim);
    for (int row = 0; row < bundle.dataLength(); row++) {
        DoubleVector d = get(bundle, row, 0, DoubleVector.class);
        cm.put(d);
        for (int col = 0; col < dim; col++) {
            final double v = d.doubleValue(col);
            if (v > Double.NEGATIVE_INFINITY && v < Double.POSITIVE_INFINITY) {
                mvs[col].put(v);
            }
        }
    }
    double[][] ncm = cm.destroyToPopulationMatrix();
    for (int col = 0; col < dim; col++) {
        for (int row = 0; row < dim; row++) {
            assertEquals("Unexpected covariance", col == row ? 1. : 0., ncm[row][col], 1e-15);
        }
        assertEquals("Mean not as expected", 0., mvs[col].getMean(), 1e-15);
        assertEquals("Variance not as expected", 1., mvs[col].getNaiveVariance(), 1e-15);
    }
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 13 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class GaussianModel method run.

/**
 * Run the algorithm
 *
 * @param relation Data relation
 * @return Outlier result
 */
public OutlierResult run(Relation<V> relation) {
    DoubleMinMax mm = new DoubleMinMax();
    // resulting scores
    WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    // Compute mean and covariance Matrix
    CovarianceMatrix temp = CovarianceMatrix.make(relation);
    double[] mean = temp.getMeanVector(relation).toArray();
    // debugFine(mean.toString());
    double[][] covarianceMatrix = temp.destroyToPopulationMatrix();
    // debugFine(covarianceMatrix.toString());
    double[][] covarianceTransposed = inverse(covarianceMatrix);
    // Normalization factors for Gaussian PDF
    double det = new LUDecomposition(covarianceMatrix).det();
    final double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * det);
    // for each object compute Mahalanobis distance
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double[] x = minusEquals(relation.get(iditer).toArray(), mean);
        // Gaussian PDF
        final double mDist = transposeTimesTimes(x, covarianceTransposed, x);
        final double prob = fakt * FastMath.exp(-mDist * .5);
        mm.put(prob);
        oscores.putDouble(iditer, prob);
    }
    final OutlierScoreMeta meta;
    if (invert) {
        double max = mm.getMax() != 0 ? mm.getMax() : 1.;
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
        }
        meta = new BasicOutlierScoreMeta(0.0, 1.0);
    } else {
        meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
    }
    DoubleRelation res = new MaterializedDoubleRelation("Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
    return new OutlierResult(meta, res);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) LUDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) InvertedOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Example 14 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class WeightedCovarianceMatrixBuilder method processIds.

/**
 * Weighted Covariance Matrix for a set of IDs. Since we are not supplied any
 * distance information, we'll need to compute it ourselves. Covariance is
 * tied to Euclidean distance, so it probably does not make much sense to add
 * support for other distance functions?
 *
 * @param ids Database ids to process
 * @param relation Relation to process
 * @return Covariance matrix
 */
@Override
public double[][] processIds(DBIDs ids, Relation<? extends NumberVector> relation) {
    final int dim = RelationUtil.dimensionality(relation);
    final CovarianceMatrix cmat = new CovarianceMatrix(dim);
    final Centroid centroid = Centroid.make(relation, ids);
    // find maximum distance
    double maxdist = 0.0, stddev = 0.0;
    {
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double distance = weightDistance.distance(centroid, relation.get(iter));
            stddev += distance * distance;
            if (distance > maxdist) {
                maxdist = distance;
            }
        }
        if (maxdist == 0.0) {
            maxdist = 1.0;
        }
        // compute standard deviation.
        stddev = FastMath.sqrt(stddev / ids.size());
    }
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        NumberVector obj = relation.get(iter);
        double distance = weightDistance.distance(centroid, obj);
        double weight = weightfunction.getWeight(distance, maxdist, stddev);
        cmat.put(obj, weight);
    }
    return cmat.destroyToPopulationMatrix();
}
Also used : Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 15 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class LinearDiscriminantAnalysisFilter method computeProjectionMatrix.

@Override
protected double[][] computeProjectionMatrix(List<V> vectorcolumn, List<? extends ClassLabel> classcolumn, int dim) {
    Map<ClassLabel, IntList> classes = partition(classcolumn);
    // Fix indexing of classes:
    List<ClassLabel> keys = new ArrayList<>(classes.keySet());
    // Compute centroids:
    List<Centroid> centroids = computeCentroids(dim, vectorcolumn, keys, classes);
    final double[][] sigmaB, sigmaI;
    // Between classes covariance:
    {
        CovarianceMatrix covmake = new CovarianceMatrix(dim);
        for (Centroid c : centroids) {
            covmake.put(c);
        }
        sigmaB = covmake.destroyToSampleMatrix();
    }
    {
        // (Average) within class variance:
        CovarianceMatrix covmake = new CovarianceMatrix(dim);
        int numc = keys.size();
        for (int i = 0; i < numc; i++) {
            double[] c = centroids.get(i).getArrayRef();
            // TODO: different weighting strategies? Sampling?
            for (IntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext(); ) {
                covmake.put(minusEquals(vectorcolumn.get(it.nextInt()).toArray(), c));
            }
        }
        sigmaI = covmake.destroyToSampleMatrix();
        if (new LUDecomposition(sigmaI).det() == 0) {
            for (int i = 0; i < dim; i++) {
                sigmaI[i][i] += 1e-10;
            }
        }
    }
    double[][] sol = times(inverse(sigmaI), sigmaB);
    EigenvalueDecomposition decomp = new EigenvalueDecomposition(sol);
    SortedEigenPairs sorted = new SortedEigenPairs(decomp, false);
    return transpose(sorted.eigenVectors(tdim));
}
Also used : IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) EigenvalueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition) ArrayList(java.util.ArrayList) LUDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition) IntList(it.unimi.dsi.fastutil.ints.IntList) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) Centroid(de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) SortedEigenPairs(de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SortedEigenPairs)

Aggregations

CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)15 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)4 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)4 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)4 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)4 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)4 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 LUDecomposition (de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition)3 ArrayList (java.util.ArrayList)3 Test (org.junit.Test)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2