Search in sources :

Example 1 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class ClassicMultidimensionalScalingTransformTest method parameters.

/**
 * Test with parameters.
 */
@Test
public void parameters() {
    int pdim = 2;
    String filename = UNITTEST + "transformation-test-1.csv";
    ClassicMultidimensionalScalingTransform<DoubleVector, DoubleVector> filter = // 
    new ELKIBuilder<ClassicMultidimensionalScalingTransform<DoubleVector, DoubleVector>>(ClassicMultidimensionalScalingTransform.class).with(ClassicMultidimensionalScalingTransform.Parameterizer.DIM_ID, // 
    pdim).with(ClassicMultidimensionalScalingTransform.Parameterizer.DISTANCE_ID, // 
    EuclideanDistanceFunction.class).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    int dimu = getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    int dimf = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    assertEquals("Dimensionality not as requested", pdim, dimf);
    // Verify that the Euclidean distance between any two points is identical
    // before and after the MDS transform is performed - O(n^2)!
    // Calculate the covariance matricies of the filtered and unfiltered
    // bundles.
    CovarianceMatrix cmUnfil = new CovarianceMatrix(dimu);
    CovarianceMatrix cmFil = new CovarianceMatrix(dimf);
    for (int outer = 0; outer < filteredBundle.dataLength(); outer++) {
        DoubleVector dFil_1 = get(filteredBundle, outer, 0, DoubleVector.class);
        DoubleVector dUnfil_1 = get(unfilteredBundle, outer, 0, DoubleVector.class);
        cmUnfil.put(dUnfil_1);
        cmFil.put(dFil_1);
        for (int row = outer + 1; row < filteredBundle.dataLength(); row++) {
            DoubleVector dFil_2 = get(filteredBundle, row, 0, DoubleVector.class);
            DoubleVector dUnfil_2 = get(unfilteredBundle, row, 0, DoubleVector.class);
            final double distF = EuclideanDistanceFunction.STATIC.distance(dFil_1, dFil_2);
            final double distU = EuclideanDistanceFunction.STATIC.distance(dUnfil_1, dUnfil_2);
            assertEquals("Expected same distance", distU, distF, 1e-11);
        }
    }
    // Calculate the SVD of the covariance matrix of the unfiltered data.
    // Verify that this SVD represents the diagonals of the covariance matrix of
    // the filtered data.
    double[][] ncmUnfil = cmUnfil.destroyToPopulationMatrix();
    double[][] ncmFil = cmFil.destroyToPopulationMatrix();
    SingularValueDecomposition svd = new SingularValueDecomposition(ncmUnfil);
    double[] dia = svd.getSingularValues();
    for (int ii = 0; ii < dia.length; ii++) {
        assertEquals("Unexpected covariance", dia[ii], ncmFil[ii][ii], 1e-11);
    }
}
Also used : EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 2 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class FastMultidimensionalScalingTransformTest method parameters.

/**
 * Test with parameters.
 */
@Test
public void parameters() {
    int pdim = 2;
    String filename = UNITTEST + "transformation-test-1.csv";
    FastMultidimensionalScalingTransform<DoubleVector, DoubleVector> filter = // 
    new ELKIBuilder<FastMultidimensionalScalingTransform<DoubleVector, DoubleVector>>(FastMultidimensionalScalingTransform.class).with(ClassicMultidimensionalScalingTransform.Parameterizer.DIM_ID, // 
    pdim).with(FastMultidimensionalScalingTransform.Parameterizer.RANDOM_ID, // 
    0L).with(ClassicMultidimensionalScalingTransform.Parameterizer.DISTANCE_ID, // 
    EuclideanDistanceFunction.class).build();
    MultipleObjectsBundle filteredBundle = readBundle(filename, filter);
    // Load the test data again without a filter.
    MultipleObjectsBundle unfilteredBundle = readBundle(filename);
    int dimu = getFieldDimensionality(unfilteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    int dimf = getFieldDimensionality(filteredBundle, 0, TypeUtil.NUMBER_VECTOR_FIELD);
    assertEquals("Dimensionality not as requested", pdim, dimf);
    // Verify that the Euclidean distance between any two points is identical
    // before and after the MDS transform is performed - O(n^2)!
    // Calculate the covariance matricies of the filtered and unfiltered
    // bundles.
    CovarianceMatrix cmUnfil = new CovarianceMatrix(dimu);
    CovarianceMatrix cmFil = new CovarianceMatrix(dimf);
    for (int outer = 0; outer < filteredBundle.dataLength(); outer++) {
        DoubleVector dFil_1 = get(filteredBundle, outer, 0, DoubleVector.class);
        DoubleVector dUnfil_1 = get(unfilteredBundle, outer, 0, DoubleVector.class);
        cmUnfil.put(dUnfil_1);
        cmFil.put(dFil_1);
        for (int row = outer + 1; row < filteredBundle.dataLength(); row++) {
            DoubleVector dFil_2 = get(filteredBundle, row, 0, DoubleVector.class);
            DoubleVector dUnfil_2 = get(unfilteredBundle, row, 0, DoubleVector.class);
            final double distF = EuclideanDistanceFunction.STATIC.distance(dFil_1, dFil_2);
            final double distU = EuclideanDistanceFunction.STATIC.distance(dUnfil_1, dUnfil_2);
            assertEquals("Expected same distance", distU, distF, 1e-10);
        }
    }
    // Calculate the SVD of the covariance matrix of the unfiltered data.
    // Verify that this SVD represents the diagonals of the covariance matrix of
    // the filtered data.
    double[][] ncmUnfil = cmUnfil.destroyToPopulationMatrix();
    double[][] ncmFil = cmFil.destroyToPopulationMatrix();
    SingularValueDecomposition svd = new SingularValueDecomposition(ncmUnfil);
    double[] dia = svd.getSingularValues();
    for (int ii = 0; ii < dia.length; ii++) {
        assertEquals("Unexpected covariance", dia[ii], ncmFil[ii][ii], 1e-8);
    }
}
Also used : EuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction) ELKIBuilder(de.lmu.ifi.dbs.elki.utilities.ELKIBuilder) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) AbstractDataSourceTest(de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest) Test(org.junit.Test)

Example 3 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class WeightedCovarianceMatrixBuilder method processQueryResults.

/**
 * Compute Covariance Matrix for a QueryResult Collection.
 *
 * By default it will just collect the ids and run processIds
 *
 * @param results a collection of QueryResults
 * @param database the database used
 * @param k number of elements to process
 * @return Covariance Matrix
 */
@Override
public double[][] processQueryResults(DoubleDBIDList results, Relation<? extends NumberVector> database, int k) {
    final int dim = RelationUtil.dimensionality(database);
    final CovarianceMatrix cmat = new CovarianceMatrix(dim);
    // avoid bad parameters
    k = k <= results.size() ? k : results.size();
    // find maximum distance
    double maxdist = 0.0, stddev = 0.0;
    {
        int i = 0;
        for (DoubleDBIDListIter it = results.iter(); it.valid() && i < k; it.advance(), k++) {
            final double dist = it.doubleValue();
            stddev += dist * dist;
            if (dist > maxdist) {
                maxdist = dist;
            }
        }
        if (maxdist == 0.0) {
            maxdist = 1.0;
        }
        stddev = FastMath.sqrt(stddev / k);
    }
    // calculate weighted PCA
    int i = 0;
    for (DoubleDBIDListIter it = results.iter(); it.valid() && i < k; it.advance(), k++) {
        final double dist = it.doubleValue();
        NumberVector obj = database.get(it);
        double weight = weightfunction.getWeight(dist, maxdist, stddev);
        cmat.put(obj, weight);
    }
    return cmat.destroyToPopulationMatrix();
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)

Example 4 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class GaussianUniformMixture method loglikelihoodNormal.

/**
 * Computes the loglikelihood of all normal objects. Gaussian model
 *
 * @param objids Object IDs for 'normal' objects.
 * @param relation Database
 * @return loglikelihood for normal objects
 */
private double loglikelihoodNormal(DBIDs objids, Relation<V> relation) {
    if (objids.isEmpty()) {
        return 0;
    }
    CovarianceMatrix builder = CovarianceMatrix.make(relation, objids);
    double[] mean = builder.getMeanVector();
    double[][] covarianceMatrix = builder.destroyToSampleMatrix();
    // test singulaere matrix
    double[][] covInv = inverse(covarianceMatrix);
    double covarianceDet = new LUDecomposition(covarianceMatrix).det();
    double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceDet);
    // for each object compute probability and sum
    double prob = 0;
    for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
        double[] x = minusEquals(relation.get(iter).toArray(), mean);
        double mDist = transposeTimesTimes(x, covInv, x);
        prob += FastMath.log(fakt * FastMath.exp(-mDist * .5));
    }
    return prob;
}
Also used : LUDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 5 with CovarianceMatrix

use of de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix in project elki by elki-project.

the class CTLuScatterplotOutlier method run.

/**
 * Main method.
 *
 * @param database Database
 * @param nrel Neighborhood relation
 * @param relation Data relation (1d!)
 * @return Outlier detection result
 */
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
    final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
    WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
    // Calculate average of neighborhood for each object and perform a linear
    // regression using the covariance matrix
    CovarianceMatrix covm = new CovarianceMatrix(2);
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        final double local = relation.get(iditer).doubleValue(0);
        // Compute mean of neighbors
        Mean mean = new Mean();
        DBIDs neighbors = npred.getNeighborDBIDs(iditer);
        for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
            if (DBIDUtil.equal(iditer, iter)) {
                continue;
            }
            mean.put(relation.get(iter).doubleValue(0));
        }
        final double m;
        if (mean.getCount() > 0) {
            m = mean.getMean();
        } else {
            // if object id has no neighbors ==> avg = non-spatial attribute of id
            m = local;
        }
        // Store the mean for the score calculation
        means.putDouble(iditer, m);
        covm.put(new double[] { local, m });
    }
    // Finalize covariance matrix, compute linear regression
    final double slope, inter;
    {
        double[] meanv = covm.getMeanVector();
        double[][] fmat = covm.destroyToSampleMatrix();
        final double covxx = fmat[0][0], covxy = fmat[0][1];
        slope = covxy / covxx;
        inter = meanv[1] - slope * meanv[0];
    }
    // calculate mean and variance for error
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    MeanVariance mv = new MeanVariance();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
        // Compute the error from the linear regression
        double y_i = relation.get(iditer).doubleValue(0);
        double e = means.doubleValue(iditer) - (slope * y_i + inter);
        scores.putDouble(iditer, e);
        mv.put(e);
    }
    // Normalize scores
    DoubleMinMax minmax = new DoubleMinMax();
    {
        final double mean = mv.getMean();
        final double variance = mv.getNaiveStddev();
        for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
            double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
            minmax.put(score);
            scores.putDouble(iditer, score);
        }
    }
    // build representation
    DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
}
Also used : Mean(de.lmu.ifi.dbs.elki.math.Mean) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) NeighborSetPredicate(de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) BasicOutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta) CovarianceMatrix(de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleMinMax(de.lmu.ifi.dbs.elki.math.DoubleMinMax) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)

Aggregations

CovarianceMatrix (de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix)15 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)4 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)4 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)4 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)4 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)4 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)4 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)3 AbstractDataSourceTest (de.lmu.ifi.dbs.elki.datasource.AbstractDataSourceTest)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 LUDecomposition (de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition)3 ArrayList (java.util.ArrayList)3 Test (org.junit.Test)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2